mirror of
https://igit.ific.uv.es/alramos/latticegpu.jl.git
synced 2025-05-14 19:23:42 +02:00
OBC Branch Merge
This commit is contained in:
parent
d026a17b44
commit
bc06079664
7 changed files with 1726 additions and 808 deletions
|
@ -105,500 +105,6 @@ struct DiracWorkspace{T}
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
export DiracWorkspace, DiracParam
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
function Csw!(dws, U, gp, lp::SpaceParm)
|
|
||||||
|
|
||||||
Computes the clover and stores it in dws.csw.
|
|
||||||
|
|
||||||
"""
|
|
||||||
function Csw!(dws, U, gp, lp::SpaceParm{4,6,B,D}) where {B,D}
|
|
||||||
|
|
||||||
@timeit "Csw computation" begin
|
|
||||||
|
|
||||||
for i in 1:Int(lp.npls)
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_csw!(dws.csw, U, gp.Ubnd, i, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, lp::SpaceParm{4,M,B,D}) where {T,M,B,D}
|
|
||||||
|
|
||||||
@inbounds begin
|
|
||||||
b = Int64(CUDA.threadIdx().x)
|
|
||||||
r = Int64(CUDA.blockIdx().x)
|
|
||||||
I = point_coord((b,r), lp)
|
|
||||||
it = I[4]
|
|
||||||
|
|
||||||
id1, id2 = lp.plidx[ipl]
|
|
||||||
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
|
|
||||||
|
|
||||||
bu1, ru1 = up((b, r), id1, lp)
|
|
||||||
bu2, ru2 = up((b, r), id2, lp)
|
|
||||||
bd1, rd1 = dw((b, r), id1, lp)
|
|
||||||
bd2, rd2 = dw((b, r), id2, lp)
|
|
||||||
bdd, rdd = dw((bd1, rd1), id2, lp)
|
|
||||||
bud, rud = dw((bu1, ru1), id2, lp)
|
|
||||||
bdu, rdu = up((bd1, rd1), id2, lp)
|
|
||||||
|
|
||||||
if SFBC && (it == lp.iL[end])
|
|
||||||
gt1 = Ubnd[id2]
|
|
||||||
gt2 = Ubnd[id2]
|
|
||||||
else
|
|
||||||
gt1 = U[bu1,id2,ru1]
|
|
||||||
gt2 = U[bud,id2,rud]
|
|
||||||
end
|
|
||||||
|
|
||||||
M1 = U[b,id1,r]*gt1/(U[b,id2,r]*U[bu2,id1,ru2])
|
|
||||||
M2 = (U[bd2,id2,rd2]\(U[bd2,id1,rd2]*gt2))/U[b,id1,r]
|
|
||||||
M3 = (U[bdd,id2,rdd]*U[bd1,id1,rd1])\(U[bdd,id1,rdd]*U[bd2,id2,rd2])
|
|
||||||
M4 = (U[b,id2,r]/(U[bd1,id2,rd1]*U[bdu,id1,rdu]))*U[bd1,id1,rd1]
|
|
||||||
|
|
||||||
|
|
||||||
if !(SFBC && (it == 1))
|
|
||||||
csw[b,ipl,r] = 0.125*(antsym(M1)+antsym(M2)+antsym(M3)+antsym(M4))
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
|
||||||
|
|
||||||
Computes the Dirac operator (with the Wilson term) `\`\``D_w``\`\` with gauge field U and parameters `dpar` of the field `si` and stores it in `so`.
|
|
||||||
If `dpar.csw` is different from zero, the clover term should be stored in `dws.csw` via the Csw! function and is automatically included in the operator.
|
|
||||||
|
|
||||||
"""
|
|
||||||
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
|
||||||
|
|
||||||
if abs(dpar.csw) > 1.0E-10
|
|
||||||
@timeit "Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
else
|
|
||||||
@timeit "Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,B,D}) where {B,D}
|
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
|
||||||
bu3, ru3 = up((b,r), 3, lp)
|
|
||||||
bd3, rd3 = dw((b,r), 3, lp)
|
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
|
||||||
|
|
||||||
@inbounds begin
|
|
||||||
|
|
||||||
so[b,r] = (4+m0)*si[b,r]+ im*tm*dmul(Gamma{5},si[b,r]) + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
|
||||||
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
|
||||||
|
|
||||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
|
||||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
|
||||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
|
||||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
|
||||||
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function krnl_Dw!(so, U, si, m0, tm, th, lp::SpaceParm{4,6,B,D}) where {B,D}
|
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
|
||||||
bu3, ru3 = up((b,r), 3, lp)
|
|
||||||
bd3, rd3 = dw((b,r), 3, lp)
|
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
|
||||||
|
|
||||||
@inbounds begin
|
|
||||||
|
|
||||||
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r])
|
|
||||||
|
|
||||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
|
||||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
|
||||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
|
||||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
|
||||||
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
|
||||||
|
|
||||||
if abs(dpar.csw) > 1.0E-10
|
|
||||||
@timeit "Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
else
|
|
||||||
@timeit "Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
|
||||||
|
|
||||||
# The field si is assumed to be zero at t = 0
|
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
|
||||||
|
|
||||||
if (point_time((b,r),lp) != 1)
|
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
|
||||||
bu3, ru3 = up((b,r), 3, lp)
|
|
||||||
bd3, rd3 = dw((b,r), 3, lp)
|
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
|
||||||
|
|
||||||
@inbounds begin
|
|
||||||
|
|
||||||
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r]) + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
|
||||||
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
|
||||||
|
|
||||||
|
|
||||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
|
||||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
|
||||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
|
||||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
|
||||||
|
|
||||||
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
|
|
||||||
so[b,r] += (ct-1.0)*si[b,r]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function krnl_Dw!(so, U, si, m0, tm, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
|
||||||
|
|
||||||
# The field si is assumed to be zero at t = 0
|
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
|
||||||
|
|
||||||
if (point_time((b,r),lp) != 1)
|
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
|
||||||
bu3, ru3 = up((b,r), 3, lp)
|
|
||||||
bd3, rd3 = dw((b,r), 3, lp)
|
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
|
||||||
|
|
||||||
@inbounds begin
|
|
||||||
|
|
||||||
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r])
|
|
||||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
|
||||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
|
||||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
|
||||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
|
||||||
|
|
||||||
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
|
|
||||||
so[b,r] += (ct-1.0)*si[b,r]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
|
||||||
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
|
||||||
|
|
||||||
Computes \`\` \\gamma_5 \`\` times the Dirac operator (with the Wilson term) with gauge field U and parameters `dpar` of the field `si` and stores it in `so`.
|
|
||||||
If `dpar.csw` is different from zero, the clover term should be stored in `dws.csw` via the Csw! function and is automatically included in the operator.
|
|
||||||
"""
|
|
||||||
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
|
||||||
|
|
||||||
if abs(dpar.csw) > 1.0E-10
|
|
||||||
@timeit "g5Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
else
|
|
||||||
@timeit "g5Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,B,D}) where {B,D}
|
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
|
||||||
bu3, ru3 = up((b,r), 3, lp)
|
|
||||||
bd3, rd3 = dw((b,r), 3, lp)
|
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
|
||||||
|
|
||||||
@inbounds begin
|
|
||||||
|
|
||||||
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
|
||||||
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
|
||||||
|
|
||||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
|
||||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
|
||||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
|
||||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
|
||||||
|
|
||||||
so[b,r] = dmul(Gamma{5}, so[b,r])+ im*tm*si[b,r]
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function krnl_g5Dw!(so, U, si, m0, tm, th, lp::SpaceParm{4,6,B,D}) where {B,D}
|
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
|
||||||
bu3, ru3 = up((b,r), 3, lp)
|
|
||||||
bd3, rd3 = dw((b,r), 3, lp)
|
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
|
||||||
|
|
||||||
@inbounds begin
|
|
||||||
|
|
||||||
so[b,r] = (4+m0)*si[b,r]
|
|
||||||
|
|
||||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
|
||||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
|
||||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
|
||||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
|
||||||
|
|
||||||
so[b,r] = dmul(Gamma{5}, so[b,r]) + im*tm*si[b,r]
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
|
||||||
|
|
||||||
if abs(dpar.csw) > 1.0E-10
|
|
||||||
@timeit "g5Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
else
|
|
||||||
@timeit "g5Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
|
||||||
|
|
||||||
# The field si is assumed to be zero at t = 0
|
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
|
||||||
|
|
||||||
if (point_time((b,r),lp) != 1)
|
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
|
||||||
bu3, ru3 = up((b,r), 3, lp)
|
|
||||||
bd3, rd3 = dw((b,r), 3, lp)
|
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
|
||||||
|
|
||||||
@inbounds begin
|
|
||||||
|
|
||||||
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
|
||||||
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
|
||||||
|
|
||||||
|
|
||||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
|
||||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
|
||||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
|
||||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
|
||||||
|
|
||||||
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
|
|
||||||
so[b,r] += (ct-1.0)*si[b,r]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
so[b,r] = dmul(Gamma{5}, so[b,r])+ im*tm*si[b,r]
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function krnl_g5Dw!(so, U, si, m0, tm, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
|
||||||
|
|
||||||
# The field si is assumed to be zero at t = 0
|
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
|
||||||
|
|
||||||
if (point_time((b,r),lp) != 1)
|
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
|
||||||
bu3, ru3 = up((b,r), 3, lp)
|
|
||||||
bd3, rd3 = dw((b,r), 3, lp)
|
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
|
||||||
|
|
||||||
@inbounds begin
|
|
||||||
|
|
||||||
so[b,r] = (4+m0)*si[b,r]
|
|
||||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
|
||||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
|
||||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
|
||||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
|
||||||
|
|
||||||
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
|
|
||||||
so[b,r] += (ct-1.0)*si[b,r]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
so[b,r] = dmul(Gamma{5}, so[b,r]) + im*tm*si[b,r]
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
|
||||||
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
|
||||||
|
|
||||||
Applies the operator \`\` \\gamma_5 D_w \`\` twice to `si` and stores the result in `so`. This is equivalent to appling the operator \`\` D_w^\\dagger D_w \`\`
|
|
||||||
The Dirac operator is the same as in the functions `Dw!` and `g5Dw!`
|
|
||||||
"""
|
|
||||||
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
|
||||||
|
|
||||||
if abs(dpar.csw) > 1.0E-10
|
|
||||||
@timeit "DwdagDw" begin
|
|
||||||
|
|
||||||
@timeit "g5Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
SF_bndfix!(dws.st,lp)
|
|
||||||
@timeit "g5Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
SF_bndfix!(so,lp)
|
|
||||||
end
|
|
||||||
else
|
|
||||||
@timeit "DwdagDw" begin
|
|
||||||
|
|
||||||
@timeit "g5Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
SF_bndfix!(dws.st,lp)
|
|
||||||
@timeit "g5Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, dpar.ct, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
SF_bndfix!(so,lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
|
||||||
|
|
||||||
if abs(dpar.csw) > 1.0E-10
|
|
||||||
@timeit "DwdagDw" begin
|
|
||||||
|
|
||||||
@timeit "g5Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
@timeit "g5Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
else
|
|
||||||
@timeit "DwdagDw" begin
|
|
||||||
|
|
||||||
@timeit "g5Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
@timeit "g5Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
function mtwmdpar(dpar::DiracParam)
|
function mtwmdpar(dpar::DiracParam)
|
||||||
|
@ -610,108 +116,19 @@ function mtwmdpar(dpar::DiracParam{P,R}) where {P,R}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
"""
|
export DiracWorkspace, DiracParam, mtwmdpar
|
||||||
SF_bndfix!(sp, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}})
|
|
||||||
|
|
||||||
Sets all the values of `sp` in the first time slice to zero.
|
include("Diracfields.jl")
|
||||||
"""
|
export SF_bndfix!, Csw!, pfrandomize!
|
||||||
function SF_bndfix!(sp, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
|
||||||
@timeit "SF boundary fix" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(sp, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function krnl_sfbndfix!(sp,lp::SpaceParm)
|
|
||||||
b=Int64(CUDA.threadIdx().x)
|
|
||||||
r=Int64(CUDA.blockIdx().x)
|
|
||||||
|
|
||||||
if (point_time((b,r),lp) == 1)
|
|
||||||
sp[b,r] = 0.0*sp[b,r]
|
|
||||||
end
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund / SU2fund {T}}}, lp::SpaceParm, t::Int64 = 0)
|
|
||||||
|
|
||||||
Randomizes the SU2fund / SU3fund fermion field. If the argument t is present, it only randomizes that time-slice.
|
|
||||||
"""
|
|
||||||
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::SpaceParm, t::Int64 = 0) where {T}
|
|
||||||
|
|
||||||
@timeit "Randomize pseudofermion field" begin
|
|
||||||
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4) # complex generation not suported for Julia 1.5.4
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su3!(f,p,lp,t)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function krnl_assign_pf_su3!(f::AbstractArray, p , lp::SpaceParm, t::Int64)
|
|
||||||
|
|
||||||
@inbounds begin
|
|
||||||
b = Int64(CUDA.threadIdx().x)
|
|
||||||
r = Int64(CUDA.blockIdx().x)
|
|
||||||
|
|
||||||
if t == 0
|
|
||||||
f[b,r] = Spinor(map(x->SU3fund(x[b,1,r,1] + im* x[b,1,r,2],
|
|
||||||
x[b,2,r,1] + im* x[b,2,r,2],
|
|
||||||
x[b,3,r,1] + im* x[b,3,r,2]),p))
|
|
||||||
elseif point_time((b,r),lp) == t
|
|
||||||
f[b,r] = Spinor(map(x->SU3fund(x[b,1,r,1] + im* x[b,1,r,2],
|
|
||||||
x[b,2,r,1] + im* x[b,2,r,2],
|
|
||||||
x[b,3,r,1] + im* x[b,3,r,2]),p))
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function pfrandomize!(f::AbstractArray{Spinor{4, SU2fund{T}}},lp::SpaceParm, t::Int64=0) where {T}
|
|
||||||
|
|
||||||
@timeit "Randomize pseudofermion field" begin
|
|
||||||
p = ntuple(i->CUDA.randn(T, lp.bsz, 2, lp.rsz,2),4) # complex generation not suported for Julia 1.5.4
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su2!(f,p,lp,t)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function krnl_assign_pf_su2!(f::AbstractArray, p , lp::SpaceParm, t::Int64)
|
|
||||||
|
|
||||||
@inbounds begin
|
|
||||||
b = Int64(CUDA.threadIdx().x)
|
|
||||||
r = Int64(CUDA.blockIdx().x)
|
|
||||||
|
|
||||||
if t == 0
|
|
||||||
f[b,r] = Spinor(map(x->SU2fund(x[b,1,r,1] + im* x[b,1,r,2],
|
|
||||||
x[b,2,r,1] + im* x[b,2,r,2]),p))
|
|
||||||
elseif point_time((b,r),lp) == t
|
|
||||||
f[b,r] = Spinor(map(x->SU2fund(x[b,1,r,1] + im* x[b,1,r,2],
|
|
||||||
x[b,2,r,1] + im* x[b,2,r,2]),p))
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
export Dw!, g5Dw!, DwdagDw!, SF_bndfix!, Csw!, pfrandomize!, mtwmdpar
|
|
||||||
|
|
||||||
|
include("Diracoper.jl")
|
||||||
|
export Dw!, g5Dw!, DwdagDw!
|
||||||
|
|
||||||
include("DiracIO.jl")
|
include("DiracIO.jl")
|
||||||
export read_prop, save_prop, read_dpar
|
export read_prop, save_prop, read_dpar
|
||||||
|
|
||||||
include("Diracflow.jl")
|
include("Diracflow.jl")
|
||||||
export Dslash_sq!, flw, backflow
|
export Nablanabla!, Dslash_sq!, flw, backflow
|
||||||
|
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
211
src/Dirac/Diracfields.jl
Normal file
211
src/Dirac/Diracfields.jl
Normal file
|
@ -0,0 +1,211 @@
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
function Csw!(dws, U, gp, lp::SpaceParm)
|
||||||
|
|
||||||
|
Computes the clover and stores it in dws.csw.
|
||||||
|
|
||||||
|
"""
|
||||||
|
function Csw!(dws, U, gp, lp::SpaceParm{4,6,B,D}) where {B,D}
|
||||||
|
|
||||||
|
@timeit "Csw computation" begin
|
||||||
|
|
||||||
|
for i in 1:Int(lp.npls)
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_csw!(dws.csw, U, gp.Ubnd, i, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, lp::SpaceParm{4,M,B,D}) where {T,M,B,D}
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
b = Int64(CUDA.threadIdx().x)
|
||||||
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
I = point_coord((b,r), lp)
|
||||||
|
it = I[4]
|
||||||
|
|
||||||
|
id1, id2 = lp.plidx[ipl]
|
||||||
|
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
|
||||||
|
OBC = (B == BC_OPEN) && ((it == 1) || (it == lp.iL[end]))
|
||||||
|
|
||||||
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
|
bu2, ru2 = up((b, r), id2, lp)
|
||||||
|
bd1, rd1 = dw((b, r), id1, lp)
|
||||||
|
bd2, rd2 = dw((b, r), id2, lp)
|
||||||
|
bdd, rdd = dw((bd1, rd1), id2, lp)
|
||||||
|
bud, rud = dw((bu1, ru1), id2, lp)
|
||||||
|
bdu, rdu = up((bd1, rd1), id2, lp)
|
||||||
|
|
||||||
|
if SFBC && (it == lp.iL[end])
|
||||||
|
gt1 = Ubnd[id2]
|
||||||
|
gt2 = Ubnd[id2]
|
||||||
|
else
|
||||||
|
gt1 = U[bu1,id2,ru1]
|
||||||
|
gt2 = U[bud,id2,rud]
|
||||||
|
end
|
||||||
|
|
||||||
|
M1 = U[b,id1,r]*gt1/(U[b,id2,r]*U[bu2,id1,ru2])
|
||||||
|
M2 = (U[bd2,id2,rd2]\(U[bd2,id1,rd2]*gt2))/U[b,id1,r]
|
||||||
|
M3 = (U[bdd,id2,rdd]*U[bd1,id1,rd1])\(U[bdd,id1,rdd]*U[bd2,id2,rd2])
|
||||||
|
M4 = (U[b,id2,r]/(U[bd1,id2,rd1]*U[bdu,id1,rdu]))*U[bd1,id1,rd1]
|
||||||
|
|
||||||
|
|
||||||
|
if !(SFBC && (it == 1)) && !OBC
|
||||||
|
csw[b,ipl,r] = 0.125*(antsym(M1)+antsym(M2)+antsym(M3)+antsym(M4))
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
SF_bndfix!(sp, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}})
|
||||||
|
|
||||||
|
Sets all the values of `sp` in the first time slice to zero.
|
||||||
|
"""
|
||||||
|
function SF_bndfix!(sp, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||||
|
@timeit "SF boundary fix" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(sp, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_sfbndfix!(sp,lp::SpaceParm)
|
||||||
|
b=Int64(CUDA.threadIdx().x)
|
||||||
|
r=Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) == 1)
|
||||||
|
sp[b,r] = 0.0*sp[b,r]
|
||||||
|
end
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
"""
|
||||||
|
SF_bndfix!(sp, lp::SpaceParm{4,6,BC_OPEN,D})
|
||||||
|
|
||||||
|
Sets all the values of `sp` in the first and last time slice to zero.
|
||||||
|
"""
|
||||||
|
function SF_bndfix!(sp, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
@timeit "SF boundary fix" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_opbndfix!(sp, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_opbndfix!(sp,lp::SpaceParm)
|
||||||
|
b=Int64(CUDA.threadIdx().x)
|
||||||
|
r=Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
if ((point_time((b,r),lp) == 1) || (point_time((b,r),lp) == lp.iL[end]))
|
||||||
|
sp[b,r] = 0.0*sp[b,r]
|
||||||
|
end
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund / SU2fund {T}}}, lp::SpaceParm, t::Int64 = 0)
|
||||||
|
|
||||||
|
Randomizes the SU2fund / SU3fund fermion field. If the argument t is present, it only randomizes that time-slice.
|
||||||
|
"""
|
||||||
|
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::SpaceParm{4,6,BC_PERIODIC,D}, t::Int64 = 0) where {T,D}
|
||||||
|
|
||||||
|
@timeit "Randomize pseudofermion field" begin
|
||||||
|
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4)./sqrt(2) # complex generation not suported for Julia 1.5.4
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su3!(f,p,lp,t)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}, t::Int64 = 0) where {T,D}
|
||||||
|
|
||||||
|
@timeit "Randomize pseudofermion field" begin
|
||||||
|
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4)./sqrt(2) # complex generation not suported for Julia 1.5.4
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su3!(f,p,lp,t)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(f,lp)
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_assign_pf_su3!(f::AbstractArray, p , lp::SpaceParm, t::Int64)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
b = Int64(CUDA.threadIdx().x)
|
||||||
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
if t == 0
|
||||||
|
f[b,r] = Spinor(map(x->SU3fund(x[b,1,r,1] + im* x[b,1,r,2],
|
||||||
|
x[b,2,r,1] + im* x[b,2,r,2],
|
||||||
|
x[b,3,r,1] + im* x[b,3,r,2]),p))
|
||||||
|
elseif point_time((b,r),lp) == t
|
||||||
|
f[b,r] = Spinor(map(x->SU3fund(x[b,1,r,1] + im* x[b,1,r,2],
|
||||||
|
x[b,2,r,1] + im* x[b,2,r,2],
|
||||||
|
x[b,3,r,1] + im* x[b,3,r,2]),p))
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function pfrandomize!(f::AbstractArray{Spinor{4, SU2fund{T}}}, lp::SpaceParm{4,6,BC_PERIODIC,D}, t::Int64 = 0) where {T,D}
|
||||||
|
|
||||||
|
@timeit "Randomize pseudofermion field" begin
|
||||||
|
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4)./sqrt(2) # complex generation not suported for Julia 1.5.4
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su2!(f,p,lp,t)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function pfrandomize!(f::AbstractArray{Spinor{4, SU2fund{T}}}, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}, t::Int64 = 0) where {T,D}
|
||||||
|
|
||||||
|
@timeit "Randomize pseudofermion field" begin
|
||||||
|
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4)./sqrt(2) # complex generation not suported for Julia 1.5.4
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su2!(f,p,lp,t)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(f,lp)
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_assign_pf_su2!(f::AbstractArray, p , lp::SpaceParm, t::Int64)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
b = Int64(CUDA.threadIdx().x)
|
||||||
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
if t == 0
|
||||||
|
f[b,r] = Spinor(map(x->SU2fund(x[b,1,r,1] + im* x[b,1,r,2],
|
||||||
|
x[b,2,r,1] + im* x[b,2,r,2]),p))
|
||||||
|
elseif point_time((b,r),lp) == t
|
||||||
|
f[b,r] = Spinor(map(x->SU2fund(x[b,1,r,1] + im* x[b,1,r,2],
|
||||||
|
x[b,2,r,1] + im* x[b,2,r,2]),p))
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
|
@ -30,7 +30,7 @@ function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::D
|
||||||
|
|
||||||
ymws.mom .= int.e0[k].*ymws.mom .+ int.e1[k].*ymws.frc1
|
ymws.mom .= int.e0[k].*ymws.mom .+ int.e1[k].*ymws.frc1
|
||||||
U .= expm.(U, ymws.mom, 2*eps)
|
U .= expm.(U, ymws.mom, 2*eps)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -86,7 +86,7 @@ function backflow(psi, U, Dt, maxnsave::Int64, gp::GaugeParm, dpar::DiracParam,
|
||||||
@timeit "CPU to GPU" copyto!(U,U0)
|
@timeit "CPU to GPU" copyto!(U,U0)
|
||||||
|
|
||||||
for j in dsave:-1:1
|
for j in dsave:-1:1
|
||||||
@timeit "CPU to GPU" copyto!(U,U0)
|
@timeit "CPU to GPU" copyto!(U,U0)
|
||||||
for k in 1:j-1
|
for k in 1:j-1
|
||||||
flw(U, int, 1, eps_all[k], gp, lp, ymws)
|
flw(U, int, 1, eps_all[k], gp, lp, ymws)
|
||||||
end
|
end
|
||||||
|
@ -154,83 +154,6 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
|
||||||
|
|
||||||
Computes /`/` \\nabla^* \\nabla /`/` `si` and stores it in `si`.
|
|
||||||
|
|
||||||
"""
|
|
||||||
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
|
||||||
|
|
||||||
@timeit "Laplacian" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {B,D}
|
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
|
||||||
|
|
||||||
@inbounds begin
|
|
||||||
|
|
||||||
so[b,r] = -4*si[b,r]
|
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
|
||||||
bu3, ru3 = up((b,r), 3, lp)
|
|
||||||
bd3, rd3 = dw((b,r), 3, lp)
|
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
|
||||||
|
|
||||||
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
|
|
||||||
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
|
|
||||||
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
|
|
||||||
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
function krnl_Nablanabla(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
|
||||||
|
|
||||||
@inbounds begin
|
|
||||||
|
|
||||||
if (point_time((b,r),lp) != 1)
|
|
||||||
|
|
||||||
so[b,r] = -4*si[b,r]
|
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
|
||||||
bu3, ru3 = up((b,r), 3, lp)
|
|
||||||
bd3, rd3 = dw((b,r), 3, lp)
|
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
|
||||||
|
|
||||||
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
|
|
||||||
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
|
|
||||||
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
|
|
||||||
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
function flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T}
|
function flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T}
|
||||||
|
|
||||||
|
@ -278,13 +201,123 @@ end
|
||||||
flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T} = flw_adapt(U, psi, int, tend, int.eps_ini, gp, dpar, lp, ymws, dws)
|
flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T} = flw_adapt(U, psi, int, tend, int.eps_ini, gp, dpar, lp, ymws, dws)
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
||||||
|
|
||||||
|
Computes /`/` \\nabla^* \\nabla /`/` `si` and stores it in `si`.
|
||||||
|
|
||||||
|
"""
|
||||||
|
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||||
|
@timeit "Laplacian" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}) where {D}
|
||||||
|
SF_bndfix!(si,lp)
|
||||||
|
@timeit "Laplacian" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(so,lp)
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end]))
|
||||||
|
|
||||||
|
so[b,r] = -4*si[b,r]
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
|
||||||
|
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
|
||||||
|
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
|
||||||
|
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = -4*si[b,r]
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
|
||||||
|
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
|
||||||
|
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
|
||||||
|
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_Nablanabla(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) != 1)
|
||||||
|
|
||||||
|
so[b,r] = -4*si[b,r]
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
|
||||||
|
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
|
||||||
|
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
|
||||||
|
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
export Nablanabla!, flw, backflow, flw_adapt, bflw_step!
|
export Nablanabla!, flw, backflow, flw_adapt, bflw_step!
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
||||||
|
|
||||||
Computes /`/` //slashed{D}^2 si /`/` ans stores it in `si`.
|
Computes /`/` //slashed{D}^2 si /`/` ans stores it in `si`.
|
||||||
|
@ -292,40 +325,40 @@ Computes /`/` //slashed{D}^2 si /`/` ans stores it in `si`.
|
||||||
"""
|
"""
|
||||||
function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
||||||
|
|
||||||
@timeit "DwdagDw" begin
|
@timeit "DwdagDw" begin
|
||||||
|
|
||||||
@timeit "g5Dslsh" begin
|
@timeit "g5Dslsh" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(dws.st, U, si, dpar.th, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(dws.st, U, si, dpar.th, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
if abs(dpar.csw) > 1.0E-10
|
if abs(dpar.csw) > 1.0E-10
|
||||||
@timeit "Dw_improvement" begin
|
@timeit "Dw_improvement" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(dws.st, dws.csw, dpar.csw, si, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(dws.st, dws.csw, dpar.csw, si, lp)
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
@timeit "g5Dslsh" begin
|
@timeit "g5Dslsh" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(so, U, dws.st, dpar.th, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(so, U, dws.st, dpar.th, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
if abs(dpar.csw) > 1.0E-10
|
if abs(dpar.csw) > 1.0E-10
|
||||||
@timeit "Dw_improvement" begin
|
@timeit "Dw_improvement" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(so, dws.csw, dpar.csw, dws.st, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(so, dws.csw, dpar.csw, dws.st, lp)
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -349,12 +382,12 @@ function krnl_g5Dslsh!(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},Spac
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
so[b,r] = dmul(Gamma{5}, so[b,r])
|
so[b,r] = dmul(Gamma{5}, so[b,r])
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
return nothing
|
return nothing
|
||||||
|
@ -369,19 +402,19 @@ function krnl_g5Dslsh!(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {D,B}
|
||||||
|
|
||||||
so[b,r] = 4*si[b,r]
|
so[b,r] = 4*si[b,r]
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
bu3, ru3 = up((b,r), 3, lp)
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
bd3, rd3 = dw((b,r), 3, lp)
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
so[b,r] = dmul(Gamma{5}, so[b,r])
|
so[b,r] = dmul(Gamma{5}, so[b,r])
|
||||||
end
|
end
|
||||||
|
@ -393,11 +426,11 @@ function krnl_g5Dslsh_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,
|
||||||
|
|
||||||
@inbounds begin
|
@inbounds begin
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x);
|
b = Int64(CUDA.threadIdx().x);
|
||||||
r = Int64(CUDA.blockIdx().x)
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||||
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
|
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
|
||||||
end
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
|
@ -409,15 +442,15 @@ function krnl_g5Dslsh_impr!(so, Fcsw, csw, si, lp::Union{SpaceParm{4,6,BC_SF_ORB
|
||||||
|
|
||||||
@inbounds begin
|
@inbounds begin
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x);
|
b = Int64(CUDA.threadIdx().x);
|
||||||
r = Int64(CUDA.blockIdx().x)
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
if (point_time((b,r),lp) != 1)
|
if (point_time((b,r),lp) != 1)
|
||||||
|
|
||||||
so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||||
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
|
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
|
||||||
end
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
664
src/Dirac/Diracoper.jl
Normal file
664
src/Dirac/Diracoper.jl
Normal file
|
@ -0,0 +1,664 @@
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## OPEN
|
||||||
|
|
||||||
|
"""
|
||||||
|
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
||||||
|
|
||||||
|
Computes the Dirac operator (with the Wilson term) `\`\``D_w``\`\` with gauge field U and parameters `dpar` of the field `si` and stores it in `so`.
|
||||||
|
If `dpar.csw` is different from zero, the clover term should be stored in `dws.csw` via the Csw! function and is automatically included in the operator.
|
||||||
|
|
||||||
|
"""
|
||||||
|
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
|
||||||
|
SF_bndfix!(si,lp)
|
||||||
|
if abs(dpar.csw) > 1.0E-10
|
||||||
|
@timeit "Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
else
|
||||||
|
@timeit "Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(so,lp)
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
|
||||||
|
# The field si is assumed to be zero at t = 0,T
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end]))
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r]) + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||||
|
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
||||||
|
|
||||||
|
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == (lp.iL[4]-1))
|
||||||
|
so[b,r] += (ct-1.0)*si[b,r]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_Dw!(so, U, si, m0, tm, th, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
|
||||||
|
# The field si is assumed to be zero at t = 0,T
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end]))
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r])
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == (lp.iL[4]-1))
|
||||||
|
so[b,r] += (ct-1.0)*si[b,r]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
||||||
|
|
||||||
|
Computes \`\` \\gamma_5 \`\` times the Dirac operator (with the Wilson term) with gauge field U and parameters `dpar` of the field `si` and stores it in `so`.
|
||||||
|
If `dpar.csw` is different from zero, the clover term should be stored in `dws.csw` via the Csw! function and is automatically included in the operator.
|
||||||
|
"""
|
||||||
|
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
|
||||||
|
SF_bndfix!(si,lp)
|
||||||
|
if abs(dpar.csw) > 1.0E-10
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
else
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(so,lp)
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
|
||||||
|
# The field si is assumed to be zero at t = 0,T
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end]))
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||||
|
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
||||||
|
|
||||||
|
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == (lp.iL[4]-1))
|
||||||
|
so[b,r] += (ct-1.0)*si[b,r]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
so[b,r] = dmul(Gamma{5}, so[b,r])+ im*tm*si[b,r]
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_g5Dw!(so, U, si, m0, tm, th, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
|
||||||
|
# The field si is assumed to be zero at t = 0,T
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end]))
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r]
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == (lp.iL[4]-1))
|
||||||
|
so[b,r] += (ct-1.0)*si[b,r]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
so[b,r] = dmul(Gamma{5}, so[b,r]) + im*tm*si[b,r]
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
"""
|
||||||
|
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
||||||
|
|
||||||
|
Applies the operator \`\` \\gamma_5 D_w \`\` twice to `si` and stores the result in `so`. This is equivalent to appling the operator \`\` D_w^\\dagger D_w \`\`
|
||||||
|
The Dirac operator is the same as in the functions `Dw!` and `g5Dw!`
|
||||||
|
"""
|
||||||
|
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
|
||||||
|
SF_bndfix!(si,lp)
|
||||||
|
if abs(dpar.csw) > 1.0E-10
|
||||||
|
@timeit "DwdagDw" begin
|
||||||
|
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(dws.st,lp)
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(so,lp)
|
||||||
|
end
|
||||||
|
else
|
||||||
|
@timeit "DwdagDw" begin
|
||||||
|
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(dws.st,lp)
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(so,lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
## PERDIODIC
|
||||||
|
|
||||||
|
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||||
|
|
||||||
|
if abs(dpar.csw) > 1.0E-10
|
||||||
|
@timeit "Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
else
|
||||||
|
@timeit "Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r]+ im*tm*dmul(Gamma{5},si[b,r]) + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||||
|
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
||||||
|
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_Dw!(so, U, si, m0, tm, th, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r])
|
||||||
|
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||||
|
|
||||||
|
if abs(dpar.csw) > 1.0E-10
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
else
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||||
|
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
||||||
|
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
so[b,r] = dmul(Gamma{5}, so[b,r])+ im*tm*si[b,r]
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_g5Dw!(so, U, si, m0, tm, th, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r]
|
||||||
|
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
so[b,r] = dmul(Gamma{5}, so[b,r]) + im*tm*si[b,r]
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||||
|
|
||||||
|
if abs(dpar.csw) > 1.0E-10
|
||||||
|
@timeit "DwdagDw" begin
|
||||||
|
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
else
|
||||||
|
@timeit "DwdagDw" begin
|
||||||
|
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
## SF
|
||||||
|
|
||||||
|
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||||
|
|
||||||
|
SF_bndfix!(si,lp)
|
||||||
|
if abs(dpar.csw) > 1.0E-10
|
||||||
|
@timeit "Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
else
|
||||||
|
@timeit "Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||||
|
|
||||||
|
# The field si is assumed to be zero at t = 0
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) != 1)
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r]) + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||||
|
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
||||||
|
|
||||||
|
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
|
||||||
|
so[b,r] += (ct-1.0)*si[b,r]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_Dw!(so, U, si, m0, tm, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||||
|
|
||||||
|
# The field si is assumed to be zero at t = 0
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) != 1)
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r])
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
|
||||||
|
so[b,r] += (ct-1.0)*si[b,r]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||||
|
|
||||||
|
SF_bndfix!(si,lp)
|
||||||
|
if abs(dpar.csw) > 1.0E-10
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
else
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||||
|
|
||||||
|
# The field si is assumed to be zero at t = 0
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) != 1)
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||||
|
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
||||||
|
|
||||||
|
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
|
||||||
|
so[b,r] += (ct-1.0)*si[b,r]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
so[b,r] = dmul(Gamma{5}, so[b,r])+ im*tm*si[b,r]
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_g5Dw!(so, U, si, m0, tm, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||||
|
|
||||||
|
# The field si is assumed to be zero at t = 0
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) != 1)
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r]
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
|
||||||
|
so[b,r] += (ct-1.0)*si[b,r]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
so[b,r] = dmul(Gamma{5}, so[b,r]) + im*tm*si[b,r]
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||||
|
|
||||||
|
if abs(dpar.csw) > 1.0E-10
|
||||||
|
@timeit "DwdagDw" begin
|
||||||
|
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(dws.st,lp)
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(so,lp)
|
||||||
|
end
|
||||||
|
else
|
||||||
|
@timeit "DwdagDw" begin
|
||||||
|
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(dws.st,lp)
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(so,lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
582
src/YM/YMact.jl
582
src/YM/YMact.jl
|
@ -9,7 +9,11 @@
|
||||||
### created: Mon Jul 12 18:31:19 2021
|
### created: Mon Jul 12 18:31:19 2021
|
||||||
###
|
###
|
||||||
|
|
||||||
function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, ztw, lp::SpaceParm{N,M,B,D}) where {T,NB,N,M,B,D}
|
|
||||||
|
##
|
||||||
|
## OPEN
|
||||||
|
##
|
||||||
|
function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, ztw, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,NB,N,M,D}
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x)
|
b = Int64(CUDA.threadIdx().x)
|
||||||
r = Int64(CUDA.blockIdx().x)
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
@ -21,7 +25,318 @@ function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, zt
|
||||||
@inbounds begin
|
@inbounds begin
|
||||||
for id1 in N:-1:1
|
for id1 in N:-1:1
|
||||||
bu1, ru1 = up((b, r), id1, lp)
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1==N)
|
TOBC = (id1==N)
|
||||||
|
|
||||||
|
for id2 = 1:id1-1
|
||||||
|
bu2, ru2 = up((b, r), id2, lp)
|
||||||
|
ipl = ipl + 1
|
||||||
|
|
||||||
|
TWP = (I[id1]==1) && (I[id2]==1)
|
||||||
|
TWH2 = TWP || ( (I[id1]==lp.iL[id1]) && (I[id2]==1) )
|
||||||
|
TWH3 = TWP || ( (I[id1]==1) && (I[id2]==lp.iL[id2]) )
|
||||||
|
|
||||||
|
# H2 staple
|
||||||
|
(b1, r1) = up((b,r), id1, lp)
|
||||||
|
(b2, r2) = up((b1,r1), id1, lp)
|
||||||
|
gb = U[b2,id2,r2]
|
||||||
|
|
||||||
|
(b2, r2) = up((b1,r1), id2, lp)
|
||||||
|
h2 = (U[b1,id1,r1]*gb)/U[b2,id1,r2]
|
||||||
|
|
||||||
|
# H3 staple
|
||||||
|
(b1, r1) = up((b,r), id2, lp)
|
||||||
|
(b2, r2) = up((b1,r1), id2, lp)
|
||||||
|
|
||||||
|
(b3, r3) = up((b1,r1), id1, lp)
|
||||||
|
|
||||||
|
gc = U[b3,id2,r3]
|
||||||
|
|
||||||
|
h3 = (U[b1,id2,r1]*U[b2,id1,r2])/gc
|
||||||
|
# END staples
|
||||||
|
|
||||||
|
ga = U[bu1,id2,ru1]
|
||||||
|
|
||||||
|
g2 = U[b,id2,r]\U[b,id1,r]
|
||||||
|
|
||||||
|
if ( (it == lp.iL[end]) || (it == 1) ) && !TOBC
|
||||||
|
S += 0.5*cG*(c0*tr(g2*ga/U[bu2,id1,ru2]) + c1*tr(g2*ga/h3) + c1*tr(g2*h2/U[bu2,id1,ru2]))
|
||||||
|
elseif (it == lp.iL[end]-1) && TOBC
|
||||||
|
S += c0*tr(g2*ga/U[bu2,id1,ru2]) + c1*tr(g2*ga/h3)
|
||||||
|
elseif (it == lp.iL[end]) && TOBC
|
||||||
|
nothing
|
||||||
|
else
|
||||||
|
if TWP
|
||||||
|
S += (ztw[ipl]*c0)*tr(g2*ga/U[bu2,id1,ru2])
|
||||||
|
else
|
||||||
|
S += c0*tr(g2*ga/U[bu2,id1,ru2])
|
||||||
|
end
|
||||||
|
if TWH2
|
||||||
|
S += (ztw[ipl]*c1)*tr(g2*h2/U[bu2,id1,ru2])
|
||||||
|
else
|
||||||
|
S += c1*tr(g2*h2/U[bu2,id1,ru2])
|
||||||
|
end
|
||||||
|
if TWH3
|
||||||
|
S += (ztw[ipl]*c1)*tr(g2*ga/h3)
|
||||||
|
else
|
||||||
|
S += c1*tr(g2*ga/h3)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
plx[I] = S
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,N,M,D}
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
b = Int64(CUDA.threadIdx().x)
|
||||||
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
I = point_coord((b,r), lp)
|
||||||
|
it = I[N]
|
||||||
|
|
||||||
|
S = zero(eltype(plx))
|
||||||
|
ipl = 0
|
||||||
|
for id1 in N:-1:1
|
||||||
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
|
TOBC = (id1==N)
|
||||||
|
|
||||||
|
for id2 = 1:id1-1
|
||||||
|
bu2, ru2 = up((b, r), id2, lp)
|
||||||
|
ipl = ipl + 1
|
||||||
|
TWP = (I[id1]==1) && (I[id2]==1)
|
||||||
|
|
||||||
|
gt1 = U[bu1,id2,ru1]
|
||||||
|
|
||||||
|
if ( (it == lp.iL[end]) || (it == 1)) && !TOBC
|
||||||
|
S += 0.5*cG*(tr(U[b,id1,r]*gt1 / (U[b,id2,r]*U[bu2,id1,ru2])))
|
||||||
|
elseif (it == lp.iL[end]) && TOBC
|
||||||
|
nothing
|
||||||
|
else
|
||||||
|
if TWP
|
||||||
|
S += ztw[ipl]*tr(U[b,id1,r]*gt1 / (U[b,id2,r]*U[bu2,id1,ru2]))
|
||||||
|
else
|
||||||
|
S += tr(U[b,id1,r]*gt1 / (U[b,id2,r]*U[bu2,id1,ru2]))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
plx[I] = S
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_force_wilson_pln!(frc1, frc2, U::AbstractArray{T}, Ubnd, cG, ztw, ipl, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,N,M,D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x)
|
||||||
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
I = point_coord((b,r), lp)
|
||||||
|
it = I[N]
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
id1, id2 = lp.plidx[ipl]
|
||||||
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
|
bu2, ru2 = up((b, r), id2, lp)
|
||||||
|
TWP = (I[id1]==1)&&(I[id2]==1)
|
||||||
|
|
||||||
|
TOBC = (id1 == N)
|
||||||
|
|
||||||
|
gt1 = U[bu1,id2,ru1]
|
||||||
|
|
||||||
|
g1 = gt1/U[bu2,id1,ru2]
|
||||||
|
g2 = U[b,id2,r]\U[b,id1,r]
|
||||||
|
|
||||||
|
if !TOBC && ( (it == 1) || (it == lp.iL[end]) )
|
||||||
|
X = 0.5*cG*projalg(U[b,id1,r]*g1/U[b,id2,r])
|
||||||
|
|
||||||
|
frc1[b ,id1, r ] -= X
|
||||||
|
frc1[b ,id2, r ] += X
|
||||||
|
frc2[bu1,id2,ru1] -= 0.5*cG*projalg(g1*g2)
|
||||||
|
frc2[bu2,id1,ru2] += 0.5*cG*projalg(g2*g1)
|
||||||
|
elseif TOBC && (it == lp.iL[end])
|
||||||
|
nothing
|
||||||
|
else
|
||||||
|
if TWP
|
||||||
|
X = projalg(ztw,U[b,id1,r]*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= projalg(ztw,g1*g2)
|
||||||
|
frc2[bu2,id1,ru2] += projalg(ztw,g2*g1)
|
||||||
|
else
|
||||||
|
X = projalg(U[b,id1,r]*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= projalg(g1*g2)
|
||||||
|
frc2[bu2,id1,ru2] += projalg(g2*g1)
|
||||||
|
end
|
||||||
|
frc1[b ,id1, r ] -= X
|
||||||
|
frc1[b ,id2, r ] += X
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG, ztw, ipl, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,N,M,D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x)
|
||||||
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
I = point_coord((b,r), lp)
|
||||||
|
it = I[N]
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
id1, id2 = lp.plidx[ipl]
|
||||||
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
|
bu2, ru2 = up((b, r), id2, lp)
|
||||||
|
|
||||||
|
TOBC = (id1 == N)
|
||||||
|
TWP = (I[id1]==1) && (I[id2]==1)
|
||||||
|
TWH1 = TWP || ( (I[id1]==1) && (I[id2]==2) )
|
||||||
|
TWH2 = TWP || ( (I[id1]==lp.iL[id1]) && (I[id2]==1) )
|
||||||
|
TWH3 = TWP || ( (I[id1]==1) && (I[id2]==lp.iL[id2]) )
|
||||||
|
TWH4 = TWP || ( (I[id1]==2) && (I[id2]==1) )
|
||||||
|
|
||||||
|
# H1 staple
|
||||||
|
(b1, r1) = dw((b,r), id2, lp)
|
||||||
|
(b2, r2) = up((b1,r1), id1, lp)
|
||||||
|
gc = U[b2,id2,r2]
|
||||||
|
h1 = (U[b1,id2,r1]\U[b1,id1,r1])*gc
|
||||||
|
|
||||||
|
# H2 staple
|
||||||
|
(b1, r1) = up((b,r), id1, lp)
|
||||||
|
(b2, r2) = up((b1,r1), id1, lp)
|
||||||
|
gb = U[b2,id2,r2]
|
||||||
|
|
||||||
|
(b2, r2) = up((b1,r1), id2, lp)
|
||||||
|
h2 = (U[b1,id1,r1]*gb)/U[b2,id1,r2]
|
||||||
|
|
||||||
|
# H3 staple
|
||||||
|
(b1, r1) = up((b,r), id2, lp)
|
||||||
|
(b2, r2) = up((b1,r1), id2, lp)
|
||||||
|
(b3, r3) = up((b1,r1), id1, lp)
|
||||||
|
gc = U[b3,id2,r3]
|
||||||
|
h3 = (U[b1,id2,r1]*U[b2,id1,r2])/gc
|
||||||
|
|
||||||
|
# H4 staple
|
||||||
|
(b1, r1) = dw((b,r), id1, lp)
|
||||||
|
(b2, r2) = up((b1,r1), id2, lp)
|
||||||
|
h4 = (U[b1,id1,r1]\U[b1,id2,r1])*U[b2,id1,r2]
|
||||||
|
# END staples
|
||||||
|
|
||||||
|
ga = U[bu1,id2,ru1]
|
||||||
|
|
||||||
|
g1 = ga/U[bu2,id1,ru2]
|
||||||
|
g2 = U[b,id2,r]\U[b,id1,r]
|
||||||
|
|
||||||
|
if !TOBC && ( (it == 1) || (it == lp.iL[end]) )
|
||||||
|
X = 0.5*cG*(c0*projalg(U[b,id1,r]*g1/U[b,id2,r]) + c1*projalg(U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2])) + c1*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3)) )
|
||||||
|
|
||||||
|
frc1[b,id1,r] -= X + 0.5*cG*c1*projalg(U[b,id1,r]*g1/h4)
|
||||||
|
frc1[b,id2,r] += X + 0.5*cG*c1*projalg(h1*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= 0.5*cG*c0*projalg(g1*g2)
|
||||||
|
frc2[bu2,id1,ru2] += 0.5*cG*c0*projalg(g2*g1)
|
||||||
|
frc2[bu1,id2,ru1] -= 0.5*cG*c1*projalg((g1/U[b,id2,r])*h1)
|
||||||
|
frc2[bu2,id1,ru2] += 0.5*cG*c1*projalg((U[b,id2,r]\h1)*g1)
|
||||||
|
frc2[bu2,id1,ru2] += 0.5*cG*c1*projalg(g2*h2/U[bu2,id1,ru2])
|
||||||
|
frc2[bu1,id2,ru1] -= 0.5*cG*c1*projalg((ga/h3)*g2)
|
||||||
|
frc2[bu1,id2,ru1] -= 0.5*cG*c1*projalg((g1/h4)*U[b,id1,r])
|
||||||
|
frc2[bu2,id1,ru2] += 0.5*cG*c1*projalg(h4\U[b,id1,r]*g1)
|
||||||
|
elseif TOBC && (it == lp.iL[end])
|
||||||
|
nothing
|
||||||
|
elseif TOBC && (it == 1)
|
||||||
|
X = c0*projalg(U[b,id1,r]*g1/U[b,id2,r]) + c1*projalg(U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2])) + c1*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3))
|
||||||
|
|
||||||
|
frc1[b,id1,r] -= X
|
||||||
|
frc1[b,id2,r] += X + c1*projalg(h1*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= c0*projalg(g1*g2)
|
||||||
|
frc2[bu2,id1,ru2] += c0*projalg(g2*g1)
|
||||||
|
frc2[bu1,id2,ru1] -= c1*projalg((g1/U[b,id2,r])*h1)
|
||||||
|
frc2[bu2,id1,ru2] += c1*projalg((U[b,id2,r]\h1)*g1)
|
||||||
|
frc2[bu2,id1,ru2] += c1*projalg(g2*h2/U[bu2,id1,ru2])
|
||||||
|
frc2[bu1,id2,ru1] -= c1*projalg((ga/h3)*g2)
|
||||||
|
elseif TOBC && (it == (lp.iL[end]-1) )
|
||||||
|
X = c0*projalg(U[b,id1,r]*g1/U[b,id2,r]) + c1*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3))
|
||||||
|
|
||||||
|
frc1[b,id1,r] -= X + c1*projalg(U[b,id1,r]*g1/h4)
|
||||||
|
frc1[b,id2,r] += X + c1*projalg(h1*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= c0*projalg(g1*g2)
|
||||||
|
frc2[bu2,id1,ru2] += c0*projalg(g2*g1)
|
||||||
|
frc2[bu1,id2,ru1] -= c1*projalg((g1/U[b,id2,r])*h1)
|
||||||
|
frc2[bu2,id1,ru2] += c1*projalg((U[b,id2,r]\h1)*g1)
|
||||||
|
frc2[bu1,id2,ru1] -= c1*projalg((ga/h3)*g2)
|
||||||
|
frc2[bu1,id2,ru1] -= c1*projalg((g1/h4)*U[b,id1,r])
|
||||||
|
frc2[bu2,id1,ru2] += c1*projalg(h4\U[b,id1,r]*g1)
|
||||||
|
else
|
||||||
|
if TWP
|
||||||
|
X = projalg(c0*ztw,U[b,id1,r]*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= projalg(c0*ztw,g1*g2)
|
||||||
|
frc2[bu2,id1,ru2] += projalg(c0*ztw,g2*g1)
|
||||||
|
else
|
||||||
|
X = c0*projalg(U[b,id1,r]*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= c0*projalg(g1*g2)
|
||||||
|
frc2[bu2,id1,ru2] += c0*projalg(g2*g1)
|
||||||
|
end
|
||||||
|
if TWH1
|
||||||
|
frc1[b,id2,r] += projalg(ztw*c1,h1*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(g1/U[b,id2,r])*h1)
|
||||||
|
frc2[bu2,id1,ru2] += projalg(ztw*c1,(U[b,id2,r]\h1)*g1)
|
||||||
|
else
|
||||||
|
frc1[b,id2,r] += c1*projalg(h1*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= c1*projalg((g1/U[b,id2,r])*h1)
|
||||||
|
frc2[bu2,id1,ru2] += c1*projalg((U[b,id2,r]\h1)*g1)
|
||||||
|
end
|
||||||
|
if TWH2
|
||||||
|
X += projalg(ztw*c1,U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2]))
|
||||||
|
frc2[bu2,id1,ru2] += projalg(ztw*c1,g2*h2/U[bu2,id1,ru2])
|
||||||
|
else
|
||||||
|
X += c1*projalg(U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2]))
|
||||||
|
frc2[bu2,id1,ru2] += c1*projalg(g2*h2/U[bu2,id1,ru2])
|
||||||
|
end
|
||||||
|
if TWH3
|
||||||
|
X += projalg(ztw*c1,U[b,id1,r]*ga/(U[b,id2,r]*h3))
|
||||||
|
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(ga/h3)*g2)
|
||||||
|
else
|
||||||
|
X += c1*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3))
|
||||||
|
frc2[bu1,id2,ru1] -= c1*projalg((ga/h3)*g2)
|
||||||
|
end
|
||||||
|
if TWH4
|
||||||
|
frc1[b,id1,r] -= projalg(ztw*c1,U[b,id1,r]*g1/h4)
|
||||||
|
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(g1/h4)*U[b,id1,r])
|
||||||
|
frc2[bu2,id1,ru2] += projalg(ztw*c1,h4\U[b,id1,r]*g1)
|
||||||
|
else
|
||||||
|
frc1[b,id1,r] -= c1*projalg(U[b,id1,r]*g1/h4)
|
||||||
|
frc2[bu1,id2,ru1] -= c1*projalg((g1/h4)*U[b,id1,r])
|
||||||
|
frc2[bu2,id1,ru2] += c1*projalg(h4\U[b,id1,r]*g1)
|
||||||
|
end
|
||||||
|
frc1[b,id1,r] -= X
|
||||||
|
frc1[b,id2,r] += X
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
## SF
|
||||||
|
##
|
||||||
|
function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, ztw, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {T,NB,N,M,D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x)
|
||||||
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
I = point_coord((b,r), lp)
|
||||||
|
it = I[N]
|
||||||
|
|
||||||
|
ipl = 0
|
||||||
|
S = zero(eltype(plx))
|
||||||
|
@inbounds begin
|
||||||
|
for id1 in N:-1:1
|
||||||
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
|
SFBC = (id1==N)
|
||||||
|
|
||||||
for id2 = 1:id1-1
|
for id2 = 1:id1-1
|
||||||
bu2, ru2 = up((b, r), id2, lp)
|
bu2, ru2 = up((b, r), id2, lp)
|
||||||
|
@ -95,7 +410,7 @@ function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, zt
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::SpaceParm{N,M,B,D}) where {T,N,M,B,D}
|
function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {T,N,M,D}
|
||||||
|
|
||||||
|
|
||||||
@inbounds begin
|
@inbounds begin
|
||||||
|
@ -103,14 +418,13 @@ function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::SpaceParm{N,M,B
|
||||||
r = Int64(CUDA.blockIdx().x)
|
r = Int64(CUDA.blockIdx().x)
|
||||||
I = point_coord((b,r), lp)
|
I = point_coord((b,r), lp)
|
||||||
it = I[N]
|
it = I[N]
|
||||||
IBND = ( ( (B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) &&
|
IBND = ( (it == 1) || (it == lp.iL[end]))
|
||||||
( (it == 1) || (it == lp.iL[end])) )
|
|
||||||
|
|
||||||
S = zero(eltype(plx))
|
S = zero(eltype(plx))
|
||||||
ipl = 0
|
ipl = 0
|
||||||
for id1 in N:-1:1
|
for id1 in N:-1:1
|
||||||
bu1, ru1 = up((b, r), id1, lp)
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
SFBND = IBND && (id1 == N)
|
SFBND = IBND && (id1 == N)
|
||||||
|
|
||||||
for id2 = 1:id1-1
|
for id2 = 1:id1-1
|
||||||
bu2, ru2 = up((b, r), id2, lp)
|
bu2, ru2 = up((b, r), id2, lp)
|
||||||
|
@ -141,7 +455,7 @@ function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::SpaceParm{N,M,B
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
function krnl_force_wilson_pln!(frc1, frc2, U::AbstractArray{T}, Ubnd, cG, ztw, ipl, lp::SpaceParm{N,M,B,D}) where {T,N,M,B,D}
|
function krnl_force_wilson_pln!(frc1, frc2, U::AbstractArray{T}, Ubnd, cG, ztw, ipl, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {T,N,M,D}
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x)
|
b = Int64(CUDA.threadIdx().x)
|
||||||
r = Int64(CUDA.blockIdx().x)
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
@ -154,7 +468,7 @@ function krnl_force_wilson_pln!(frc1, frc2, U::AbstractArray{T}, Ubnd, cG, ztw,
|
||||||
bu2, ru2 = up((b, r), id2, lp)
|
bu2, ru2 = up((b, r), id2, lp)
|
||||||
TWP = (I[id1]==1)&&(I[id2]==1)
|
TWP = (I[id1]==1)&&(I[id2]==1)
|
||||||
|
|
||||||
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == N)
|
SFBC = (id1 == N)
|
||||||
|
|
||||||
if SFBC && (it == lp.iL[end])
|
if SFBC && (it == lp.iL[end])
|
||||||
gt1 = Ubnd[id2]
|
gt1 = Ubnd[id2]
|
||||||
|
@ -195,7 +509,7 @@ function krnl_force_wilson_pln!(frc1, frc2, U::AbstractArray{T}, Ubnd, cG, ztw,
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG, ztw, ipl, lp::SpaceParm{N,M,B,D}) where {T,N,M,B,D}
|
function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG, ztw, ipl, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {T,N,M,D}
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x)
|
b = Int64(CUDA.threadIdx().x)
|
||||||
r = Int64(CUDA.blockIdx().x)
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
@ -207,7 +521,7 @@ function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG,
|
||||||
bu1, ru1 = up((b, r), id1, lp)
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
bu2, ru2 = up((b, r), id2, lp)
|
bu2, ru2 = up((b, r), id2, lp)
|
||||||
|
|
||||||
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == N)
|
SFBC = (id1 == N)
|
||||||
TWP = (I[id1]==1) && (I[id2]==1)
|
TWP = (I[id1]==1) && (I[id2]==1)
|
||||||
TWH1 = TWP || ( (I[id1]==1) && (I[id2]==2) )
|
TWH1 = TWP || ( (I[id1]==1) && (I[id2]==2) )
|
||||||
TWH2 = TWP || ( (I[id1]==lp.iL[id1]) && (I[id2]==1) )
|
TWH2 = TWP || ( (I[id1]==lp.iL[id1]) && (I[id2]==1) )
|
||||||
|
@ -334,6 +648,253 @@ function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG,
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
## PERIODIC
|
||||||
|
##
|
||||||
|
function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, ztw, lp::SpaceParm{N,M,BC_PERIODIC,D}) where {T,NB,N,M,D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x)
|
||||||
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
I = point_coord((b,r), lp)
|
||||||
|
it = I[N]
|
||||||
|
|
||||||
|
ipl = 0
|
||||||
|
S = zero(eltype(plx))
|
||||||
|
@inbounds begin
|
||||||
|
for id1 in N:-1:1
|
||||||
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
|
|
||||||
|
for id2 = 1:id1-1
|
||||||
|
bu2, ru2 = up((b, r), id2, lp)
|
||||||
|
ipl = ipl + 1
|
||||||
|
|
||||||
|
TWP = (I[id1]==1) && (I[id2]==1)
|
||||||
|
TWH2 = TWP || ( (I[id1]==lp.iL[id1]) && (I[id2]==1) )
|
||||||
|
TWH3 = TWP || ( (I[id1]==1) && (I[id2]==lp.iL[id2]) )
|
||||||
|
|
||||||
|
# H2 staple
|
||||||
|
(b1, r1) = up((b,r), id1, lp)
|
||||||
|
(b2, r2) = up((b1,r1), id1, lp)
|
||||||
|
gb = U[b2,id2,r2]
|
||||||
|
|
||||||
|
(b2, r2) = up((b1,r1), id2, lp)
|
||||||
|
h2 = (U[b1,id1,r1]*gb)/U[b2,id1,r2]
|
||||||
|
|
||||||
|
# H3 staple
|
||||||
|
(b1, r1) = up((b,r), id2, lp)
|
||||||
|
(b2, r2) = up((b1,r1), id2, lp)
|
||||||
|
|
||||||
|
(b3, r3) = up((b1,r1), id1, lp)
|
||||||
|
|
||||||
|
gc = U[b3,id2,r3]
|
||||||
|
|
||||||
|
h3 = (U[b1,id2,r1]*U[b2,id1,r2])/gc
|
||||||
|
# END staples
|
||||||
|
|
||||||
|
ga = U[bu1,id2,ru1]
|
||||||
|
|
||||||
|
g2 = U[b,id2,r]\U[b,id1,r]
|
||||||
|
|
||||||
|
if TWP
|
||||||
|
S += (ztw[ipl]*c0)*tr(g2*ga/U[bu2,id1,ru2])
|
||||||
|
else
|
||||||
|
S += c0*tr(g2*ga/U[bu2,id1,ru2])
|
||||||
|
end
|
||||||
|
if TWH2
|
||||||
|
S += (ztw[ipl]*c1)*tr(g2*h2/U[bu2,id1,ru2])
|
||||||
|
else
|
||||||
|
S += c1*tr(g2*h2/U[bu2,id1,ru2])
|
||||||
|
end
|
||||||
|
if TWH3
|
||||||
|
S += (ztw[ipl]*c1)*tr(g2*ga/h3)
|
||||||
|
else
|
||||||
|
S += c1*tr(g2*ga/h3)
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
plx[I] = S
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::SpaceParm{N,M,BC_PERIODIC,D}) where {T,N,M,D}
|
||||||
|
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
b = Int64(CUDA.threadIdx().x)
|
||||||
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
I = point_coord((b,r), lp)
|
||||||
|
it = I[N]
|
||||||
|
|
||||||
|
S = zero(eltype(plx))
|
||||||
|
ipl = 0
|
||||||
|
for id1 in N:-1:1
|
||||||
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
|
|
||||||
|
for id2 = 1:id1-1
|
||||||
|
bu2, ru2 = up((b, r), id2, lp)
|
||||||
|
ipl = ipl + 1
|
||||||
|
TWP = (I[id1]==1) && (I[id2]==1)
|
||||||
|
|
||||||
|
gt1 = U[bu1,id2,ru1]
|
||||||
|
|
||||||
|
if TWP
|
||||||
|
S += ztw[ipl]*tr(U[b,id1,r]*gt1 / (U[b,id2,r]*U[bu2,id1,ru2]))
|
||||||
|
else
|
||||||
|
S += tr(U[b,id1,r]*gt1 / (U[b,id2,r]*U[bu2,id1,ru2]))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
plx[I] = S
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_force_wilson_pln!(frc1, frc2, U::AbstractArray{T}, Ubnd, cG, ztw, ipl, lp::SpaceParm{N,M,BC_PERIODIC,D}) where {T,N,M,D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x)
|
||||||
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
I = point_coord((b,r), lp)
|
||||||
|
it = I[N]
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
id1, id2 = lp.plidx[ipl]
|
||||||
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
|
bu2, ru2 = up((b, r), id2, lp)
|
||||||
|
TWP = (I[id1]==1)&&(I[id2]==1)
|
||||||
|
|
||||||
|
gt1 = U[bu1,id2,ru1]
|
||||||
|
|
||||||
|
g1 = gt1/U[bu2,id1,ru2]
|
||||||
|
g2 = U[b,id2,r]\U[b,id1,r]
|
||||||
|
|
||||||
|
if TWP
|
||||||
|
X = projalg(ztw,U[b,id1,r]*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= projalg(ztw,g1*g2)
|
||||||
|
frc2[bu2,id1,ru2] += projalg(ztw,g2*g1)
|
||||||
|
else
|
||||||
|
X = projalg(U[b,id1,r]*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= projalg(g1*g2)
|
||||||
|
frc2[bu2,id1,ru2] += projalg(g2*g1)
|
||||||
|
end
|
||||||
|
frc1[b ,id1, r ] -= X
|
||||||
|
frc1[b ,id2, r ] += X
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG, ztw, ipl, lp::SpaceParm{N,M,BC_PERIODIC,D}) where {T,N,M,D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x)
|
||||||
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
I = point_coord((b,r), lp)
|
||||||
|
it = I[N]
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
id1, id2 = lp.plidx[ipl]
|
||||||
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
|
bu2, ru2 = up((b, r), id2, lp)
|
||||||
|
|
||||||
|
TWP = (I[id1]==1) && (I[id2]==1)
|
||||||
|
TWH1 = TWP || ( (I[id1]==1) && (I[id2]==2) )
|
||||||
|
TWH2 = TWP || ( (I[id1]==lp.iL[id1]) && (I[id2]==1) )
|
||||||
|
TWH3 = TWP || ( (I[id1]==1) && (I[id2]==lp.iL[id2]) )
|
||||||
|
TWH4 = TWP || ( (I[id1]==2) && (I[id2]==1) )
|
||||||
|
|
||||||
|
# H1 staple
|
||||||
|
(b1, r1) = dw((b,r), id2, lp)
|
||||||
|
(b2, r2) = up((b1,r1), id1, lp)
|
||||||
|
|
||||||
|
gc = U[b2,id2,r2]
|
||||||
|
|
||||||
|
h1 = (U[b1,id2,r1]\U[b1,id1,r1])*gc
|
||||||
|
|
||||||
|
# H2 staple
|
||||||
|
(b1, r1) = up((b,r), id1, lp)
|
||||||
|
(b2, r2) = up((b1,r1), id1, lp)
|
||||||
|
|
||||||
|
gb = U[b2,id2,r2]
|
||||||
|
|
||||||
|
(b2, r2) = up((b1,r1), id2, lp)
|
||||||
|
h2 = (U[b1,id1,r1]*gb)/U[b2,id1,r2]
|
||||||
|
|
||||||
|
# H3 staple
|
||||||
|
(b1, r1) = up((b,r), id2, lp)
|
||||||
|
(b2, r2) = up((b1,r1), id2, lp)
|
||||||
|
(b3, r3) = up((b1,r1), id1, lp)
|
||||||
|
|
||||||
|
gc = U[b3,id2,r3]
|
||||||
|
h3 = (U[b1,id2,r1]*U[b2,id1,r2])/gc
|
||||||
|
|
||||||
|
# H4 staple
|
||||||
|
(b1, r1) = dw((b,r), id1, lp)
|
||||||
|
(b2, r2) = up((b1,r1), id2, lp)
|
||||||
|
h4 = (U[b1,id1,r1]\U[b1,id2,r1])*U[b2,id1,r2]
|
||||||
|
# END staples
|
||||||
|
|
||||||
|
ga = U[bu1,id2,ru1]
|
||||||
|
|
||||||
|
g1 = ga/U[bu2,id1,ru2]
|
||||||
|
g2 = U[b,id2,r]\U[b,id1,r]
|
||||||
|
|
||||||
|
if TWP
|
||||||
|
X = projalg(c0*ztw,U[b,id1,r]*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= projalg(c0*ztw,g1*g2)
|
||||||
|
frc2[bu2,id1,ru2] += projalg(c0*ztw,g2*g1)
|
||||||
|
else
|
||||||
|
X = c0*projalg(U[b,id1,r]*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= c0*projalg(g1*g2)
|
||||||
|
frc2[bu2,id1,ru2] += c0*projalg(g2*g1)
|
||||||
|
end
|
||||||
|
if TWH1
|
||||||
|
frc1[b,id2,r] += projalg(ztw*c1,h1*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(g1/U[b,id2,r])*h1)
|
||||||
|
frc2[bu2,id1,ru2] += projalg(ztw*c1,(U[b,id2,r]\h1)*g1)
|
||||||
|
else
|
||||||
|
frc1[b,id2,r] += c1*projalg(h1*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= c1*projalg((g1/U[b,id2,r])*h1)
|
||||||
|
frc2[bu2,id1,ru2] += c1*projalg((U[b,id2,r]\h1)*g1)
|
||||||
|
end
|
||||||
|
if TWH2
|
||||||
|
X += projalg(ztw*c1,U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2]))
|
||||||
|
frc2[bu2,id1,ru2] += projalg(ztw*c1,g2*h2/U[bu2,id1,ru2])
|
||||||
|
else
|
||||||
|
X += c1*projalg(U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2]))
|
||||||
|
frc2[bu2,id1,ru2] += c1*projalg(g2*h2/U[bu2,id1,ru2])
|
||||||
|
end
|
||||||
|
if TWH3
|
||||||
|
X += projalg(ztw*c1,U[b,id1,r]*ga/(U[b,id2,r]*h3))
|
||||||
|
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(ga/h3)*g2)
|
||||||
|
else
|
||||||
|
X += c1*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3))
|
||||||
|
frc2[bu1,id2,ru1] -= c1*projalg((ga/h3)*g2)
|
||||||
|
end
|
||||||
|
if TWH4
|
||||||
|
frc1[b,id1,r] -= projalg(ztw*c1,U[b,id1,r]*g1/h4)
|
||||||
|
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(g1/h4)*U[b,id1,r])
|
||||||
|
frc2[bu2,id1,ru2] += projalg(ztw*c1,h4\U[b,id1,r]*g1)
|
||||||
|
else
|
||||||
|
frc1[b,id1,r] -= c1*projalg(U[b,id1,r]*g1/h4)
|
||||||
|
frc2[bu1,id2,ru1] -= c1*projalg((g1/h4)*U[b,id1,r])
|
||||||
|
frc2[bu2,id1,ru2] += c1*projalg(h4\U[b,id1,r]*g1)
|
||||||
|
end
|
||||||
|
frc1[b,id1,r] -= X
|
||||||
|
frc1[b,id2,r] += X
|
||||||
|
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
"""
|
"""
|
||||||
function force_gauge(ymws::YMworkspace, U, gp::GaugeParm, lp::SpaceParm)
|
function force_gauge(ymws::YMworkspace, U, gp::GaugeParm, lp::SpaceParm)
|
||||||
|
|
||||||
|
@ -388,4 +949,3 @@ function force_pln!(frc1, ftmp, U, Ubnd, cG, ztw, lp::SpaceParm, c0=1)
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -54,27 +54,40 @@ function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::SpaceParm{N,M,BC_PERIODI
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::SpaceParm{N,M,B,D}) where {T,N,M,B,D}
|
function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,N,M,D}
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
b = Int64(CUDA.threadIdx().x)
|
||||||
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
for id in 1:lp.ndim
|
||||||
|
frc[b,id,r] = SU3alg(m[b,id,1,r], m[b,id,2,r], m[b,id,3,r],
|
||||||
|
m[b,id,4,r], m[b,id,5,r], m[b,id,6,r],
|
||||||
|
m[b,id,7,r], m[b,id,8,r])
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {T,N,M,D}
|
||||||
|
|
||||||
@inbounds begin
|
@inbounds begin
|
||||||
b = Int64(CUDA.threadIdx().x)
|
b = Int64(CUDA.threadIdx().x)
|
||||||
r = Int64(CUDA.blockIdx().x)
|
r = Int64(CUDA.blockIdx().x)
|
||||||
it = point_time((b,r), lp)
|
it = point_time((b,r), lp)
|
||||||
|
|
||||||
if ((B==BC_SF_AFWB)||(B==BC_SF_ORBI))
|
if it == 1
|
||||||
if it == 1
|
for id in 1:lp.ndim-1
|
||||||
for id in 1:lp.ndim-1
|
frc[b,id,r] = zero(T)
|
||||||
frc[b,id,r] = zero(T)
|
end
|
||||||
end
|
frc[b,N,r] = SU3alg(m[b,N,1,r], m[b,N,2,r], m[b,N,3,r],
|
||||||
frc[b,N,r] = SU3alg(m[b,N,1,r], m[b,N,2,r], m[b,N,3,r],
|
m[b,N,4,r], m[b,N,5,r], m[b,N,6,r],
|
||||||
m[b,N,4,r], m[b,N,5,r], m[b,N,6,r],
|
m[b,N,7,r], m[b,N,8,r])
|
||||||
m[b,N,7,r], m[b,N,8,r])
|
else
|
||||||
else
|
for id in 1:lp.ndim
|
||||||
for id in 1:lp.ndim
|
frc[b,id,r] = SU3alg(m[b,id,1,r], m[b,id,2,r], m[b,id,3,r],
|
||||||
frc[b,id,r] = SU3alg(m[b,id,1,r], m[b,id,2,r], m[b,id,3,r],
|
m[b,id,4,r], m[b,id,5,r], m[b,id,6,r],
|
||||||
m[b,id,4,r], m[b,id,5,r], m[b,id,6,r],
|
m[b,id,7,r], m[b,id,8,r])
|
||||||
m[b,id,7,r], m[b,id,8,r])
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -135,6 +135,7 @@ function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::S
|
||||||
it = point_time((b, r), lp)
|
it = point_time((b, r), lp)
|
||||||
|
|
||||||
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) )
|
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) )
|
||||||
|
OBC = (B == BC_OPEN)
|
||||||
|
|
||||||
@inbounds for id in 1:N
|
@inbounds for id in 1:N
|
||||||
bu, ru = up((b,r), id, lp)
|
bu, ru = up((b,r), id, lp)
|
||||||
|
@ -152,13 +153,21 @@ function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::S
|
||||||
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
|
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
|
||||||
projalg(U[b,id,r]*X/U[b,id,r]))
|
projalg(U[b,id,r]*X/U[b,id,r]))
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
if OBC
|
||||||
|
if (it > 1) && (it < lp.iL[end])
|
||||||
|
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
|
||||||
|
projalg(U[b,id,r]*X/U[b,id,r]))
|
||||||
|
elseif ((it == lp.iL[end]) || (it == 1)) && (id < N)
|
||||||
|
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
|
||||||
|
projalg(U[b,id,r]*X/U[b,id,r]))
|
||||||
|
end
|
||||||
else
|
else
|
||||||
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
|
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
|
||||||
projalg(U[b,id,r]*X/U[b,id,r]))
|
projalg(U[b,id,r]*X/U[b,id,r]))
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -265,6 +274,7 @@ function Eoft_plaq(Eslc, U, gp::GaugeParm{T,G,NN}, lp::SpaceParm{N,M,B,D}, ymws:
|
||||||
|
|
||||||
ztw = ztwist(gp, lp)
|
ztw = ztwist(gp, lp)
|
||||||
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) )
|
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) )
|
||||||
|
OBC = (B == BC_OPEN)
|
||||||
|
|
||||||
tp = ntuple(i->i, N-1)
|
tp = ntuple(i->i, N-1)
|
||||||
V3 = prod(lp.iL[1:end-1])
|
V3 = prod(lp.iL[1:end-1])
|
||||||
|
@ -285,6 +295,10 @@ function Eoft_plaq(Eslc, U, gp::GaugeParm{T,G,NN}, lp::SpaceParm{N,M,B,D}, ymws:
|
||||||
if !SFBC
|
if !SFBC
|
||||||
Eslc[1,ipl] = Etmp[1] + Etmp[end]
|
Eslc[1,ipl] = Etmp[1] + Etmp[end]
|
||||||
end
|
end
|
||||||
|
if OBC ## Check normalization of timelike boundary plaquettes
|
||||||
|
Eslc[end,ipl] = Etmp[end-1]
|
||||||
|
Eslc[1,ipl] = Etmp[1]
|
||||||
|
end
|
||||||
else
|
else
|
||||||
for it in 1:lp.iL[end]
|
for it in 1:lp.iL[end]
|
||||||
Eslc[it,ipl] = 2*Etmp[it]
|
Eslc[it,ipl] = 2*Etmp[it]
|
||||||
|
@ -327,7 +341,6 @@ function krnl_plaq_pln!(plx, U::AbstractArray{T}, Ubnd, ztw, ipl, lp::SpaceParm{
|
||||||
plx[I] = tr(U[b,id1,r]*gt / (U[b,id2,r]*U[bu2,id1,ru2]))
|
plx[I] = tr(U[b,id1,r]*gt / (U[b,id2,r]*U[bu2,id1,ru2]))
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -350,21 +363,18 @@ function Qtop(Qslc, U, gp::GaugeParm, lp::SpaceParm{4,M,B,D}, ymws::YMworkspace)
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_qd!(ymws.rm, -, ymws.frc1, ymws.frc2, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_qd!(ymws.rm, -, ymws.frc1, ymws.frc2, lp)
|
||||||
end
|
end
|
||||||
|
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, 2,4, ztw[2], ztw[4], lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, 2,4, ztw[2], ztw[4], lp)
|
||||||
end
|
end
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_qd!(ymws.rm, +, ymws.frc1, ymws.frc2, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_qd!(ymws.rm, +, ymws.frc1, ymws.frc2, lp)
|
||||||
end
|
end
|
||||||
|
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, 3,6, ztw[3], ztw[6], lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, 3,6, ztw[3], ztw[6], lp)
|
||||||
end
|
end
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_qd!(ymws.rm, -, ymws.frc1, ymws.frc2, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_qd!(ymws.rm, -, ymws.frc1, ymws.frc2, lp)
|
||||||
end
|
end
|
||||||
|
|
||||||
Qslc .= reshape(Array(CUDA.reduce(+, ymws.rm; dims=tp)),lp.iL[end])./(32*pi^2)
|
Qslc .= reshape(Array(CUDA.reduce(+, ymws.rm; dims=tp)),lp.iL[end])./(32*pi^2)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -474,6 +484,7 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T},
|
||||||
#First plane
|
#First plane
|
||||||
id1, id2 = lp.plidx[ipl1]
|
id1, id2 = lp.plidx[ipl1]
|
||||||
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
|
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
|
||||||
|
OBC = ((B == BC_OPEN) && (id1 == 4))
|
||||||
TWP = ((I[id1]==1)&&(I[id2]==1))
|
TWP = ((I[id1]==1)&&(I[id2]==1))
|
||||||
|
|
||||||
bu1, ru1 = up((b, r), id1, lp)
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
|
@ -493,6 +504,11 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T},
|
||||||
frc1[bu1,2,ru1] = zero(TA)
|
frc1[bu1,2,ru1] = zero(TA)
|
||||||
frc1[bd,3,rd] = zero(TA)
|
frc1[bd,3,rd] = zero(TA)
|
||||||
frc1[bu2,4,ru2] = projalg(l2*l1)
|
frc1[bu2,4,ru2] = projalg(l2*l1)
|
||||||
|
elseif OBC && (it == lp.iL[end])
|
||||||
|
frc1[b,1,r] = projalg(U[b,id1,r]*l1/U[b,id2,r])
|
||||||
|
frc1[bu1,2,ru1] = zero(TA)
|
||||||
|
frc1[bd,3,rd] = zero(TA)
|
||||||
|
frc1[bu2,4,ru2] = projalg(l2*l1)
|
||||||
else
|
else
|
||||||
if TWP
|
if TWP
|
||||||
frc1[b,1,r] = projalg(ztw1, U[b,id1,r]*l1/U[b,id2,r])
|
frc1[b,1,r] = projalg(ztw1, U[b,id1,r]*l1/U[b,id2,r])
|
||||||
|
@ -510,6 +526,7 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T},
|
||||||
# Second plane
|
# Second plane
|
||||||
id1, id2 = lp.plidx[ipl2]
|
id1, id2 = lp.plidx[ipl2]
|
||||||
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
|
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
|
||||||
|
OBC = ((B == BC_OPEN) && (id1 == 4))
|
||||||
TWP = ((I[id1]==1)&&(I[id2]==1))
|
TWP = ((I[id1]==1)&&(I[id2]==1))
|
||||||
|
|
||||||
bu1, ru1 = up((b, r), id1, lp)
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
|
@ -529,6 +546,11 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T},
|
||||||
frc2[bu1,2,ru1] = zero(TA)
|
frc2[bu1,2,ru1] = zero(TA)
|
||||||
frc2[bd,3,rd] = zero(TA)
|
frc2[bd,3,rd] = zero(TA)
|
||||||
frc2[bu2,4,ru2] = projalg(l2*l1)
|
frc2[bu2,4,ru2] = projalg(l2*l1)
|
||||||
|
elseif OBC && (it == lp.iL[end])
|
||||||
|
frc1[b,1,r] = projalg(U[b,id1,r]*l1/U[b,id2,r])
|
||||||
|
frc1[bu1,2,ru1] = zero(TA)
|
||||||
|
frc1[bd,3,rd] = zero(TA)
|
||||||
|
frc1[bu2,4,ru2] = projalg(l2*l1)
|
||||||
else
|
else
|
||||||
if TWP
|
if TWP
|
||||||
frc2[b,1,r] = projalg(ztw2, U[b,id1,r]*l1/U[b,id2,r])
|
frc2[b,1,r] = projalg(ztw2, U[b,id1,r]*l1/U[b,id2,r])
|
||||||
|
@ -543,7 +565,5 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T},
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue