mirror of
https://igit.ific.uv.es/alramos/latticegpu.jl.git
synced 2025-05-14 11:13:42 +02:00
OBC Branch Merge
This commit is contained in:
parent
d026a17b44
commit
bc06079664
7 changed files with 1726 additions and 808 deletions
|
@ -105,500 +105,6 @@ struct DiracWorkspace{T}
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
export DiracWorkspace, DiracParam
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
function Csw!(dws, U, gp, lp::SpaceParm)
|
|
||||||
|
|
||||||
Computes the clover and stores it in dws.csw.
|
|
||||||
|
|
||||||
"""
|
|
||||||
function Csw!(dws, U, gp, lp::SpaceParm{4,6,B,D}) where {B,D}
|
|
||||||
|
|
||||||
@timeit "Csw computation" begin
|
|
||||||
|
|
||||||
for i in 1:Int(lp.npls)
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_csw!(dws.csw, U, gp.Ubnd, i, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, lp::SpaceParm{4,M,B,D}) where {T,M,B,D}
|
|
||||||
|
|
||||||
@inbounds begin
|
|
||||||
b = Int64(CUDA.threadIdx().x)
|
|
||||||
r = Int64(CUDA.blockIdx().x)
|
|
||||||
I = point_coord((b,r), lp)
|
|
||||||
it = I[4]
|
|
||||||
|
|
||||||
id1, id2 = lp.plidx[ipl]
|
|
||||||
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
|
|
||||||
|
|
||||||
bu1, ru1 = up((b, r), id1, lp)
|
|
||||||
bu2, ru2 = up((b, r), id2, lp)
|
|
||||||
bd1, rd1 = dw((b, r), id1, lp)
|
|
||||||
bd2, rd2 = dw((b, r), id2, lp)
|
|
||||||
bdd, rdd = dw((bd1, rd1), id2, lp)
|
|
||||||
bud, rud = dw((bu1, ru1), id2, lp)
|
|
||||||
bdu, rdu = up((bd1, rd1), id2, lp)
|
|
||||||
|
|
||||||
if SFBC && (it == lp.iL[end])
|
|
||||||
gt1 = Ubnd[id2]
|
|
||||||
gt2 = Ubnd[id2]
|
|
||||||
else
|
|
||||||
gt1 = U[bu1,id2,ru1]
|
|
||||||
gt2 = U[bud,id2,rud]
|
|
||||||
end
|
|
||||||
|
|
||||||
M1 = U[b,id1,r]*gt1/(U[b,id2,r]*U[bu2,id1,ru2])
|
|
||||||
M2 = (U[bd2,id2,rd2]\(U[bd2,id1,rd2]*gt2))/U[b,id1,r]
|
|
||||||
M3 = (U[bdd,id2,rdd]*U[bd1,id1,rd1])\(U[bdd,id1,rdd]*U[bd2,id2,rd2])
|
|
||||||
M4 = (U[b,id2,r]/(U[bd1,id2,rd1]*U[bdu,id1,rdu]))*U[bd1,id1,rd1]
|
|
||||||
|
|
||||||
|
|
||||||
if !(SFBC && (it == 1))
|
|
||||||
csw[b,ipl,r] = 0.125*(antsym(M1)+antsym(M2)+antsym(M3)+antsym(M4))
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
|
||||||
|
|
||||||
Computes the Dirac operator (with the Wilson term) `\`\``D_w``\`\` with gauge field U and parameters `dpar` of the field `si` and stores it in `so`.
|
|
||||||
If `dpar.csw` is different from zero, the clover term should be stored in `dws.csw` via the Csw! function and is automatically included in the operator.
|
|
||||||
|
|
||||||
"""
|
|
||||||
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
|
||||||
|
|
||||||
if abs(dpar.csw) > 1.0E-10
|
|
||||||
@timeit "Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
else
|
|
||||||
@timeit "Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,B,D}) where {B,D}
|
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
|
||||||
bu3, ru3 = up((b,r), 3, lp)
|
|
||||||
bd3, rd3 = dw((b,r), 3, lp)
|
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
|
||||||
|
|
||||||
@inbounds begin
|
|
||||||
|
|
||||||
so[b,r] = (4+m0)*si[b,r]+ im*tm*dmul(Gamma{5},si[b,r]) + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
|
||||||
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
|
||||||
|
|
||||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
|
||||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
|
||||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
|
||||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
|
||||||
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function krnl_Dw!(so, U, si, m0, tm, th, lp::SpaceParm{4,6,B,D}) where {B,D}
|
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
|
||||||
bu3, ru3 = up((b,r), 3, lp)
|
|
||||||
bd3, rd3 = dw((b,r), 3, lp)
|
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
|
||||||
|
|
||||||
@inbounds begin
|
|
||||||
|
|
||||||
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r])
|
|
||||||
|
|
||||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
|
||||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
|
||||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
|
||||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
|
||||||
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
|
||||||
|
|
||||||
if abs(dpar.csw) > 1.0E-10
|
|
||||||
@timeit "Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
else
|
|
||||||
@timeit "Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
|
||||||
|
|
||||||
# The field si is assumed to be zero at t = 0
|
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
|
||||||
|
|
||||||
if (point_time((b,r),lp) != 1)
|
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
|
||||||
bu3, ru3 = up((b,r), 3, lp)
|
|
||||||
bd3, rd3 = dw((b,r), 3, lp)
|
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
|
||||||
|
|
||||||
@inbounds begin
|
|
||||||
|
|
||||||
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r]) + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
|
||||||
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
|
||||||
|
|
||||||
|
|
||||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
|
||||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
|
||||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
|
||||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
|
||||||
|
|
||||||
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
|
|
||||||
so[b,r] += (ct-1.0)*si[b,r]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function krnl_Dw!(so, U, si, m0, tm, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
|
||||||
|
|
||||||
# The field si is assumed to be zero at t = 0
|
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
|
||||||
|
|
||||||
if (point_time((b,r),lp) != 1)
|
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
|
||||||
bu3, ru3 = up((b,r), 3, lp)
|
|
||||||
bd3, rd3 = dw((b,r), 3, lp)
|
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
|
||||||
|
|
||||||
@inbounds begin
|
|
||||||
|
|
||||||
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r])
|
|
||||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
|
||||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
|
||||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
|
||||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
|
||||||
|
|
||||||
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
|
|
||||||
so[b,r] += (ct-1.0)*si[b,r]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
|
||||||
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
|
||||||
|
|
||||||
Computes \`\` \\gamma_5 \`\` times the Dirac operator (with the Wilson term) with gauge field U and parameters `dpar` of the field `si` and stores it in `so`.
|
|
||||||
If `dpar.csw` is different from zero, the clover term should be stored in `dws.csw` via the Csw! function and is automatically included in the operator.
|
|
||||||
"""
|
|
||||||
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
|
||||||
|
|
||||||
if abs(dpar.csw) > 1.0E-10
|
|
||||||
@timeit "g5Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
else
|
|
||||||
@timeit "g5Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,B,D}) where {B,D}
|
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
|
||||||
bu3, ru3 = up((b,r), 3, lp)
|
|
||||||
bd3, rd3 = dw((b,r), 3, lp)
|
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
|
||||||
|
|
||||||
@inbounds begin
|
|
||||||
|
|
||||||
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
|
||||||
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
|
||||||
|
|
||||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
|
||||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
|
||||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
|
||||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
|
||||||
|
|
||||||
so[b,r] = dmul(Gamma{5}, so[b,r])+ im*tm*si[b,r]
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function krnl_g5Dw!(so, U, si, m0, tm, th, lp::SpaceParm{4,6,B,D}) where {B,D}
|
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
|
||||||
bu3, ru3 = up((b,r), 3, lp)
|
|
||||||
bd3, rd3 = dw((b,r), 3, lp)
|
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
|
||||||
|
|
||||||
@inbounds begin
|
|
||||||
|
|
||||||
so[b,r] = (4+m0)*si[b,r]
|
|
||||||
|
|
||||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
|
||||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
|
||||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
|
||||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
|
||||||
|
|
||||||
so[b,r] = dmul(Gamma{5}, so[b,r]) + im*tm*si[b,r]
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
|
||||||
|
|
||||||
if abs(dpar.csw) > 1.0E-10
|
|
||||||
@timeit "g5Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
else
|
|
||||||
@timeit "g5Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
|
||||||
|
|
||||||
# The field si is assumed to be zero at t = 0
|
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
|
||||||
|
|
||||||
if (point_time((b,r),lp) != 1)
|
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
|
||||||
bu3, ru3 = up((b,r), 3, lp)
|
|
||||||
bd3, rd3 = dw((b,r), 3, lp)
|
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
|
||||||
|
|
||||||
@inbounds begin
|
|
||||||
|
|
||||||
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
|
||||||
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
|
||||||
|
|
||||||
|
|
||||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
|
||||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
|
||||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
|
||||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
|
||||||
|
|
||||||
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
|
|
||||||
so[b,r] += (ct-1.0)*si[b,r]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
so[b,r] = dmul(Gamma{5}, so[b,r])+ im*tm*si[b,r]
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function krnl_g5Dw!(so, U, si, m0, tm, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
|
||||||
|
|
||||||
# The field si is assumed to be zero at t = 0
|
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
|
||||||
|
|
||||||
if (point_time((b,r),lp) != 1)
|
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
|
||||||
bu3, ru3 = up((b,r), 3, lp)
|
|
||||||
bd3, rd3 = dw((b,r), 3, lp)
|
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
|
||||||
|
|
||||||
@inbounds begin
|
|
||||||
|
|
||||||
so[b,r] = (4+m0)*si[b,r]
|
|
||||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
|
||||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
|
||||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
|
||||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
|
||||||
|
|
||||||
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
|
|
||||||
so[b,r] += (ct-1.0)*si[b,r]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
so[b,r] = dmul(Gamma{5}, so[b,r]) + im*tm*si[b,r]
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
|
||||||
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
|
||||||
|
|
||||||
Applies the operator \`\` \\gamma_5 D_w \`\` twice to `si` and stores the result in `so`. This is equivalent to appling the operator \`\` D_w^\\dagger D_w \`\`
|
|
||||||
The Dirac operator is the same as in the functions `Dw!` and `g5Dw!`
|
|
||||||
"""
|
|
||||||
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
|
||||||
|
|
||||||
if abs(dpar.csw) > 1.0E-10
|
|
||||||
@timeit "DwdagDw" begin
|
|
||||||
|
|
||||||
@timeit "g5Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
SF_bndfix!(dws.st,lp)
|
|
||||||
@timeit "g5Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
SF_bndfix!(so,lp)
|
|
||||||
end
|
|
||||||
else
|
|
||||||
@timeit "DwdagDw" begin
|
|
||||||
|
|
||||||
@timeit "g5Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
SF_bndfix!(dws.st,lp)
|
|
||||||
@timeit "g5Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, dpar.ct, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
SF_bndfix!(so,lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
|
||||||
|
|
||||||
if abs(dpar.csw) > 1.0E-10
|
|
||||||
@timeit "DwdagDw" begin
|
|
||||||
|
|
||||||
@timeit "g5Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
@timeit "g5Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
else
|
|
||||||
@timeit "DwdagDw" begin
|
|
||||||
|
|
||||||
@timeit "g5Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
@timeit "g5Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
function mtwmdpar(dpar::DiracParam)
|
function mtwmdpar(dpar::DiracParam)
|
||||||
|
@ -610,108 +116,19 @@ function mtwmdpar(dpar::DiracParam{P,R}) where {P,R}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
"""
|
export DiracWorkspace, DiracParam, mtwmdpar
|
||||||
SF_bndfix!(sp, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}})
|
|
||||||
|
|
||||||
Sets all the values of `sp` in the first time slice to zero.
|
include("Diracfields.jl")
|
||||||
"""
|
export SF_bndfix!, Csw!, pfrandomize!
|
||||||
function SF_bndfix!(sp, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
|
||||||
@timeit "SF boundary fix" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(sp, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function krnl_sfbndfix!(sp,lp::SpaceParm)
|
|
||||||
b=Int64(CUDA.threadIdx().x)
|
|
||||||
r=Int64(CUDA.blockIdx().x)
|
|
||||||
|
|
||||||
if (point_time((b,r),lp) == 1)
|
|
||||||
sp[b,r] = 0.0*sp[b,r]
|
|
||||||
end
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund / SU2fund {T}}}, lp::SpaceParm, t::Int64 = 0)
|
|
||||||
|
|
||||||
Randomizes the SU2fund / SU3fund fermion field. If the argument t is present, it only randomizes that time-slice.
|
|
||||||
"""
|
|
||||||
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::SpaceParm, t::Int64 = 0) where {T}
|
|
||||||
|
|
||||||
@timeit "Randomize pseudofermion field" begin
|
|
||||||
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4) # complex generation not suported for Julia 1.5.4
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su3!(f,p,lp,t)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function krnl_assign_pf_su3!(f::AbstractArray, p , lp::SpaceParm, t::Int64)
|
|
||||||
|
|
||||||
@inbounds begin
|
|
||||||
b = Int64(CUDA.threadIdx().x)
|
|
||||||
r = Int64(CUDA.blockIdx().x)
|
|
||||||
|
|
||||||
if t == 0
|
|
||||||
f[b,r] = Spinor(map(x->SU3fund(x[b,1,r,1] + im* x[b,1,r,2],
|
|
||||||
x[b,2,r,1] + im* x[b,2,r,2],
|
|
||||||
x[b,3,r,1] + im* x[b,3,r,2]),p))
|
|
||||||
elseif point_time((b,r),lp) == t
|
|
||||||
f[b,r] = Spinor(map(x->SU3fund(x[b,1,r,1] + im* x[b,1,r,2],
|
|
||||||
x[b,2,r,1] + im* x[b,2,r,2],
|
|
||||||
x[b,3,r,1] + im* x[b,3,r,2]),p))
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function pfrandomize!(f::AbstractArray{Spinor{4, SU2fund{T}}},lp::SpaceParm, t::Int64=0) where {T}
|
|
||||||
|
|
||||||
@timeit "Randomize pseudofermion field" begin
|
|
||||||
p = ntuple(i->CUDA.randn(T, lp.bsz, 2, lp.rsz,2),4) # complex generation not suported for Julia 1.5.4
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su2!(f,p,lp,t)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function krnl_assign_pf_su2!(f::AbstractArray, p , lp::SpaceParm, t::Int64)
|
|
||||||
|
|
||||||
@inbounds begin
|
|
||||||
b = Int64(CUDA.threadIdx().x)
|
|
||||||
r = Int64(CUDA.blockIdx().x)
|
|
||||||
|
|
||||||
if t == 0
|
|
||||||
f[b,r] = Spinor(map(x->SU2fund(x[b,1,r,1] + im* x[b,1,r,2],
|
|
||||||
x[b,2,r,1] + im* x[b,2,r,2]),p))
|
|
||||||
elseif point_time((b,r),lp) == t
|
|
||||||
f[b,r] = Spinor(map(x->SU2fund(x[b,1,r,1] + im* x[b,1,r,2],
|
|
||||||
x[b,2,r,1] + im* x[b,2,r,2]),p))
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
export Dw!, g5Dw!, DwdagDw!, SF_bndfix!, Csw!, pfrandomize!, mtwmdpar
|
|
||||||
|
|
||||||
|
include("Diracoper.jl")
|
||||||
|
export Dw!, g5Dw!, DwdagDw!
|
||||||
|
|
||||||
include("DiracIO.jl")
|
include("DiracIO.jl")
|
||||||
export read_prop, save_prop, read_dpar
|
export read_prop, save_prop, read_dpar
|
||||||
|
|
||||||
include("Diracflow.jl")
|
include("Diracflow.jl")
|
||||||
export Dslash_sq!, flw, backflow
|
export Nablanabla!, Dslash_sq!, flw, backflow
|
||||||
|
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
211
src/Dirac/Diracfields.jl
Normal file
211
src/Dirac/Diracfields.jl
Normal file
|
@ -0,0 +1,211 @@
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
function Csw!(dws, U, gp, lp::SpaceParm)
|
||||||
|
|
||||||
|
Computes the clover and stores it in dws.csw.
|
||||||
|
|
||||||
|
"""
|
||||||
|
function Csw!(dws, U, gp, lp::SpaceParm{4,6,B,D}) where {B,D}
|
||||||
|
|
||||||
|
@timeit "Csw computation" begin
|
||||||
|
|
||||||
|
for i in 1:Int(lp.npls)
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_csw!(dws.csw, U, gp.Ubnd, i, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, lp::SpaceParm{4,M,B,D}) where {T,M,B,D}
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
b = Int64(CUDA.threadIdx().x)
|
||||||
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
I = point_coord((b,r), lp)
|
||||||
|
it = I[4]
|
||||||
|
|
||||||
|
id1, id2 = lp.plidx[ipl]
|
||||||
|
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
|
||||||
|
OBC = (B == BC_OPEN) && ((it == 1) || (it == lp.iL[end]))
|
||||||
|
|
||||||
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
|
bu2, ru2 = up((b, r), id2, lp)
|
||||||
|
bd1, rd1 = dw((b, r), id1, lp)
|
||||||
|
bd2, rd2 = dw((b, r), id2, lp)
|
||||||
|
bdd, rdd = dw((bd1, rd1), id2, lp)
|
||||||
|
bud, rud = dw((bu1, ru1), id2, lp)
|
||||||
|
bdu, rdu = up((bd1, rd1), id2, lp)
|
||||||
|
|
||||||
|
if SFBC && (it == lp.iL[end])
|
||||||
|
gt1 = Ubnd[id2]
|
||||||
|
gt2 = Ubnd[id2]
|
||||||
|
else
|
||||||
|
gt1 = U[bu1,id2,ru1]
|
||||||
|
gt2 = U[bud,id2,rud]
|
||||||
|
end
|
||||||
|
|
||||||
|
M1 = U[b,id1,r]*gt1/(U[b,id2,r]*U[bu2,id1,ru2])
|
||||||
|
M2 = (U[bd2,id2,rd2]\(U[bd2,id1,rd2]*gt2))/U[b,id1,r]
|
||||||
|
M3 = (U[bdd,id2,rdd]*U[bd1,id1,rd1])\(U[bdd,id1,rdd]*U[bd2,id2,rd2])
|
||||||
|
M4 = (U[b,id2,r]/(U[bd1,id2,rd1]*U[bdu,id1,rdu]))*U[bd1,id1,rd1]
|
||||||
|
|
||||||
|
|
||||||
|
if !(SFBC && (it == 1)) && !OBC
|
||||||
|
csw[b,ipl,r] = 0.125*(antsym(M1)+antsym(M2)+antsym(M3)+antsym(M4))
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
SF_bndfix!(sp, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}})
|
||||||
|
|
||||||
|
Sets all the values of `sp` in the first time slice to zero.
|
||||||
|
"""
|
||||||
|
function SF_bndfix!(sp, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||||
|
@timeit "SF boundary fix" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(sp, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_sfbndfix!(sp,lp::SpaceParm)
|
||||||
|
b=Int64(CUDA.threadIdx().x)
|
||||||
|
r=Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) == 1)
|
||||||
|
sp[b,r] = 0.0*sp[b,r]
|
||||||
|
end
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
"""
|
||||||
|
SF_bndfix!(sp, lp::SpaceParm{4,6,BC_OPEN,D})
|
||||||
|
|
||||||
|
Sets all the values of `sp` in the first and last time slice to zero.
|
||||||
|
"""
|
||||||
|
function SF_bndfix!(sp, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
@timeit "SF boundary fix" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_opbndfix!(sp, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_opbndfix!(sp,lp::SpaceParm)
|
||||||
|
b=Int64(CUDA.threadIdx().x)
|
||||||
|
r=Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
if ((point_time((b,r),lp) == 1) || (point_time((b,r),lp) == lp.iL[end]))
|
||||||
|
sp[b,r] = 0.0*sp[b,r]
|
||||||
|
end
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund / SU2fund {T}}}, lp::SpaceParm, t::Int64 = 0)
|
||||||
|
|
||||||
|
Randomizes the SU2fund / SU3fund fermion field. If the argument t is present, it only randomizes that time-slice.
|
||||||
|
"""
|
||||||
|
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::SpaceParm{4,6,BC_PERIODIC,D}, t::Int64 = 0) where {T,D}
|
||||||
|
|
||||||
|
@timeit "Randomize pseudofermion field" begin
|
||||||
|
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4)./sqrt(2) # complex generation not suported for Julia 1.5.4
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su3!(f,p,lp,t)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}, t::Int64 = 0) where {T,D}
|
||||||
|
|
||||||
|
@timeit "Randomize pseudofermion field" begin
|
||||||
|
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4)./sqrt(2) # complex generation not suported for Julia 1.5.4
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su3!(f,p,lp,t)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(f,lp)
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_assign_pf_su3!(f::AbstractArray, p , lp::SpaceParm, t::Int64)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
b = Int64(CUDA.threadIdx().x)
|
||||||
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
if t == 0
|
||||||
|
f[b,r] = Spinor(map(x->SU3fund(x[b,1,r,1] + im* x[b,1,r,2],
|
||||||
|
x[b,2,r,1] + im* x[b,2,r,2],
|
||||||
|
x[b,3,r,1] + im* x[b,3,r,2]),p))
|
||||||
|
elseif point_time((b,r),lp) == t
|
||||||
|
f[b,r] = Spinor(map(x->SU3fund(x[b,1,r,1] + im* x[b,1,r,2],
|
||||||
|
x[b,2,r,1] + im* x[b,2,r,2],
|
||||||
|
x[b,3,r,1] + im* x[b,3,r,2]),p))
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function pfrandomize!(f::AbstractArray{Spinor{4, SU2fund{T}}}, lp::SpaceParm{4,6,BC_PERIODIC,D}, t::Int64 = 0) where {T,D}
|
||||||
|
|
||||||
|
@timeit "Randomize pseudofermion field" begin
|
||||||
|
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4)./sqrt(2) # complex generation not suported for Julia 1.5.4
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su2!(f,p,lp,t)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function pfrandomize!(f::AbstractArray{Spinor{4, SU2fund{T}}}, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}, t::Int64 = 0) where {T,D}
|
||||||
|
|
||||||
|
@timeit "Randomize pseudofermion field" begin
|
||||||
|
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4)./sqrt(2) # complex generation not suported for Julia 1.5.4
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su2!(f,p,lp,t)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(f,lp)
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_assign_pf_su2!(f::AbstractArray, p , lp::SpaceParm, t::Int64)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
b = Int64(CUDA.threadIdx().x)
|
||||||
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
if t == 0
|
||||||
|
f[b,r] = Spinor(map(x->SU2fund(x[b,1,r,1] + im* x[b,1,r,2],
|
||||||
|
x[b,2,r,1] + im* x[b,2,r,2]),p))
|
||||||
|
elseif point_time((b,r),lp) == t
|
||||||
|
f[b,r] = Spinor(map(x->SU2fund(x[b,1,r,1] + im* x[b,1,r,2],
|
||||||
|
x[b,2,r,1] + im* x[b,2,r,2]),p))
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
|
@ -30,7 +30,7 @@ function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::D
|
||||||
|
|
||||||
ymws.mom .= int.e0[k].*ymws.mom .+ int.e1[k].*ymws.frc1
|
ymws.mom .= int.e0[k].*ymws.mom .+ int.e1[k].*ymws.frc1
|
||||||
U .= expm.(U, ymws.mom, 2*eps)
|
U .= expm.(U, ymws.mom, 2*eps)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -86,7 +86,7 @@ function backflow(psi, U, Dt, maxnsave::Int64, gp::GaugeParm, dpar::DiracParam,
|
||||||
@timeit "CPU to GPU" copyto!(U,U0)
|
@timeit "CPU to GPU" copyto!(U,U0)
|
||||||
|
|
||||||
for j in dsave:-1:1
|
for j in dsave:-1:1
|
||||||
@timeit "CPU to GPU" copyto!(U,U0)
|
@timeit "CPU to GPU" copyto!(U,U0)
|
||||||
for k in 1:j-1
|
for k in 1:j-1
|
||||||
flw(U, int, 1, eps_all[k], gp, lp, ymws)
|
flw(U, int, 1, eps_all[k], gp, lp, ymws)
|
||||||
end
|
end
|
||||||
|
@ -154,83 +154,6 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
|
||||||
|
|
||||||
Computes /`/` \\nabla^* \\nabla /`/` `si` and stores it in `si`.
|
|
||||||
|
|
||||||
"""
|
|
||||||
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
|
||||||
|
|
||||||
@timeit "Laplacian" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {B,D}
|
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
|
||||||
|
|
||||||
@inbounds begin
|
|
||||||
|
|
||||||
so[b,r] = -4*si[b,r]
|
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
|
||||||
bu3, ru3 = up((b,r), 3, lp)
|
|
||||||
bd3, rd3 = dw((b,r), 3, lp)
|
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
|
||||||
|
|
||||||
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
|
|
||||||
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
|
|
||||||
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
|
|
||||||
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
function krnl_Nablanabla(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
|
||||||
|
|
||||||
@inbounds begin
|
|
||||||
|
|
||||||
if (point_time((b,r),lp) != 1)
|
|
||||||
|
|
||||||
so[b,r] = -4*si[b,r]
|
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
|
||||||
bu3, ru3 = up((b,r), 3, lp)
|
|
||||||
bd3, rd3 = dw((b,r), 3, lp)
|
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
|
||||||
|
|
||||||
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
|
|
||||||
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
|
|
||||||
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
|
|
||||||
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
function flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T}
|
function flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T}
|
||||||
|
|
||||||
|
@ -278,13 +201,123 @@ end
|
||||||
flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T} = flw_adapt(U, psi, int, tend, int.eps_ini, gp, dpar, lp, ymws, dws)
|
flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T} = flw_adapt(U, psi, int, tend, int.eps_ini, gp, dpar, lp, ymws, dws)
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
||||||
|
|
||||||
|
Computes /`/` \\nabla^* \\nabla /`/` `si` and stores it in `si`.
|
||||||
|
|
||||||
|
"""
|
||||||
|
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||||
|
@timeit "Laplacian" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}) where {D}
|
||||||
|
SF_bndfix!(si,lp)
|
||||||
|
@timeit "Laplacian" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(so,lp)
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end]))
|
||||||
|
|
||||||
|
so[b,r] = -4*si[b,r]
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
|
||||||
|
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
|
||||||
|
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
|
||||||
|
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = -4*si[b,r]
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
|
||||||
|
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
|
||||||
|
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
|
||||||
|
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_Nablanabla(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) != 1)
|
||||||
|
|
||||||
|
so[b,r] = -4*si[b,r]
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
|
||||||
|
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
|
||||||
|
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
|
||||||
|
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
export Nablanabla!, flw, backflow, flw_adapt, bflw_step!
|
export Nablanabla!, flw, backflow, flw_adapt, bflw_step!
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
||||||
|
|
||||||
Computes /`/` //slashed{D}^2 si /`/` ans stores it in `si`.
|
Computes /`/` //slashed{D}^2 si /`/` ans stores it in `si`.
|
||||||
|
@ -292,40 +325,40 @@ Computes /`/` //slashed{D}^2 si /`/` ans stores it in `si`.
|
||||||
"""
|
"""
|
||||||
function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
||||||
|
|
||||||
@timeit "DwdagDw" begin
|
@timeit "DwdagDw" begin
|
||||||
|
|
||||||
@timeit "g5Dslsh" begin
|
@timeit "g5Dslsh" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(dws.st, U, si, dpar.th, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(dws.st, U, si, dpar.th, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
if abs(dpar.csw) > 1.0E-10
|
if abs(dpar.csw) > 1.0E-10
|
||||||
@timeit "Dw_improvement" begin
|
@timeit "Dw_improvement" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(dws.st, dws.csw, dpar.csw, si, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(dws.st, dws.csw, dpar.csw, si, lp)
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
@timeit "g5Dslsh" begin
|
@timeit "g5Dslsh" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(so, U, dws.st, dpar.th, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(so, U, dws.st, dpar.th, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
if abs(dpar.csw) > 1.0E-10
|
if abs(dpar.csw) > 1.0E-10
|
||||||
@timeit "Dw_improvement" begin
|
@timeit "Dw_improvement" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(so, dws.csw, dpar.csw, dws.st, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(so, dws.csw, dpar.csw, dws.st, lp)
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -349,12 +382,12 @@ function krnl_g5Dslsh!(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},Spac
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
so[b,r] = dmul(Gamma{5}, so[b,r])
|
so[b,r] = dmul(Gamma{5}, so[b,r])
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
return nothing
|
return nothing
|
||||||
|
@ -369,19 +402,19 @@ function krnl_g5Dslsh!(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {D,B}
|
||||||
|
|
||||||
so[b,r] = 4*si[b,r]
|
so[b,r] = 4*si[b,r]
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
bu3, ru3 = up((b,r), 3, lp)
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
bd3, rd3 = dw((b,r), 3, lp)
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
so[b,r] = dmul(Gamma{5}, so[b,r])
|
so[b,r] = dmul(Gamma{5}, so[b,r])
|
||||||
end
|
end
|
||||||
|
@ -393,11 +426,11 @@ function krnl_g5Dslsh_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,
|
||||||
|
|
||||||
@inbounds begin
|
@inbounds begin
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x);
|
b = Int64(CUDA.threadIdx().x);
|
||||||
r = Int64(CUDA.blockIdx().x)
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||||
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
|
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
|
||||||
end
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
|
@ -409,15 +442,15 @@ function krnl_g5Dslsh_impr!(so, Fcsw, csw, si, lp::Union{SpaceParm{4,6,BC_SF_ORB
|
||||||
|
|
||||||
@inbounds begin
|
@inbounds begin
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x);
|
b = Int64(CUDA.threadIdx().x);
|
||||||
r = Int64(CUDA.blockIdx().x)
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
if (point_time((b,r),lp) != 1)
|
if (point_time((b,r),lp) != 1)
|
||||||
|
|
||||||
so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||||
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
|
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
|
||||||
end
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
664
src/Dirac/Diracoper.jl
Normal file
664
src/Dirac/Diracoper.jl
Normal file
|
@ -0,0 +1,664 @@
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## OPEN
|
||||||
|
|
||||||
|
"""
|
||||||
|
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
||||||
|
|
||||||
|
Computes the Dirac operator (with the Wilson term) `\`\``D_w``\`\` with gauge field U and parameters `dpar` of the field `si` and stores it in `so`.
|
||||||
|
If `dpar.csw` is different from zero, the clover term should be stored in `dws.csw` via the Csw! function and is automatically included in the operator.
|
||||||
|
|
||||||
|
"""
|
||||||
|
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
|
||||||
|
SF_bndfix!(si,lp)
|
||||||
|
if abs(dpar.csw) > 1.0E-10
|
||||||
|
@timeit "Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
else
|
||||||
|
@timeit "Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(so,lp)
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
|
||||||
|
# The field si is assumed to be zero at t = 0,T
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end]))
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r]) + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||||
|
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
||||||
|
|
||||||
|
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == (lp.iL[4]-1))
|
||||||
|
so[b,r] += (ct-1.0)*si[b,r]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_Dw!(so, U, si, m0, tm, th, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
|
||||||
|
# The field si is assumed to be zero at t = 0,T
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end]))
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r])
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == (lp.iL[4]-1))
|
||||||
|
so[b,r] += (ct-1.0)*si[b,r]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
||||||
|
|
||||||
|
Computes \`\` \\gamma_5 \`\` times the Dirac operator (with the Wilson term) with gauge field U and parameters `dpar` of the field `si` and stores it in `so`.
|
||||||
|
If `dpar.csw` is different from zero, the clover term should be stored in `dws.csw` via the Csw! function and is automatically included in the operator.
|
||||||
|
"""
|
||||||
|
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
|
||||||
|
SF_bndfix!(si,lp)
|
||||||
|
if abs(dpar.csw) > 1.0E-10
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
else
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(so,lp)
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
|
||||||
|
# The field si is assumed to be zero at t = 0,T
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end]))
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||||
|
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
||||||
|
|
||||||
|
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == (lp.iL[4]-1))
|
||||||
|
so[b,r] += (ct-1.0)*si[b,r]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
so[b,r] = dmul(Gamma{5}, so[b,r])+ im*tm*si[b,r]
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_g5Dw!(so, U, si, m0, tm, th, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
|
||||||
|
# The field si is assumed to be zero at t = 0,T
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end]))
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r]
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == (lp.iL[4]-1))
|
||||||
|
so[b,r] += (ct-1.0)*si[b,r]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
so[b,r] = dmul(Gamma{5}, so[b,r]) + im*tm*si[b,r]
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
"""
|
||||||
|
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
||||||
|
|
||||||
|
Applies the operator \`\` \\gamma_5 D_w \`\` twice to `si` and stores the result in `so`. This is equivalent to appling the operator \`\` D_w^\\dagger D_w \`\`
|
||||||
|
The Dirac operator is the same as in the functions `Dw!` and `g5Dw!`
|
||||||
|
"""
|
||||||
|
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
|
||||||
|
SF_bndfix!(si,lp)
|
||||||
|
if abs(dpar.csw) > 1.0E-10
|
||||||
|
@timeit "DwdagDw" begin
|
||||||
|
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(dws.st,lp)
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(so,lp)
|
||||||
|
end
|
||||||
|
else
|
||||||
|
@timeit "DwdagDw" begin
|
||||||
|
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(dws.st,lp)
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(so,lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
## PERDIODIC
|
||||||
|
|
||||||
|
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||||
|
|
||||||
|
if abs(dpar.csw) > 1.0E-10
|
||||||
|
@timeit "Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
else
|
||||||
|
@timeit "Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r]+ im*tm*dmul(Gamma{5},si[b,r]) + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||||
|
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
||||||
|
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_Dw!(so, U, si, m0, tm, th, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r])
|
||||||
|
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||||
|
|
||||||
|
if abs(dpar.csw) > 1.0E-10
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
else
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||||
|
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
||||||
|
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
so[b,r] = dmul(Gamma{5}, so[b,r])+ im*tm*si[b,r]
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_g5Dw!(so, U, si, m0, tm, th, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r]
|
||||||
|
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
so[b,r] = dmul(Gamma{5}, so[b,r]) + im*tm*si[b,r]
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||||
|
|
||||||
|
if abs(dpar.csw) > 1.0E-10
|
||||||
|
@timeit "DwdagDw" begin
|
||||||
|
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
else
|
||||||
|
@timeit "DwdagDw" begin
|
||||||
|
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
## SF
|
||||||
|
|
||||||
|
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||||
|
|
||||||
|
SF_bndfix!(si,lp)
|
||||||
|
if abs(dpar.csw) > 1.0E-10
|
||||||
|
@timeit "Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
else
|
||||||
|
@timeit "Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||||
|
|
||||||
|
# The field si is assumed to be zero at t = 0
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) != 1)
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r]) + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||||
|
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
||||||
|
|
||||||
|
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
|
||||||
|
so[b,r] += (ct-1.0)*si[b,r]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_Dw!(so, U, si, m0, tm, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||||
|
|
||||||
|
# The field si is assumed to be zero at t = 0
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) != 1)
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r])
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
|
||||||
|
so[b,r] += (ct-1.0)*si[b,r]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||||
|
|
||||||
|
SF_bndfix!(si,lp)
|
||||||
|
if abs(dpar.csw) > 1.0E-10
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
else
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||||
|
|
||||||
|
# The field si is assumed to be zero at t = 0
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) != 1)
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||||
|
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
||||||
|
|
||||||
|
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
|
||||||
|
so[b,r] += (ct-1.0)*si[b,r]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
so[b,r] = dmul(Gamma{5}, so[b,r])+ im*tm*si[b,r]
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_g5Dw!(so, U, si, m0, tm, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||||
|
|
||||||
|
# The field si is assumed to be zero at t = 0
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) != 1)
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r]
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
|
||||||
|
so[b,r] += (ct-1.0)*si[b,r]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
so[b,r] = dmul(Gamma{5}, so[b,r]) + im*tm*si[b,r]
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||||
|
|
||||||
|
if abs(dpar.csw) > 1.0E-10
|
||||||
|
@timeit "DwdagDw" begin
|
||||||
|
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(dws.st,lp)
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(so,lp)
|
||||||
|
end
|
||||||
|
else
|
||||||
|
@timeit "DwdagDw" begin
|
||||||
|
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(dws.st,lp)
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(so,lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
694
src/YM/YMact.jl
694
src/YM/YMact.jl
|
@ -9,7 +9,322 @@
|
||||||
### created: Mon Jul 12 18:31:19 2021
|
### created: Mon Jul 12 18:31:19 2021
|
||||||
###
|
###
|
||||||
|
|
||||||
function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, ztw, lp::SpaceParm{N,M,B,D}) where {T,NB,N,M,B,D}
|
|
||||||
|
##
|
||||||
|
## OPEN
|
||||||
|
##
|
||||||
|
function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, ztw, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,NB,N,M,D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x)
|
||||||
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
I = point_coord((b,r), lp)
|
||||||
|
it = I[N]
|
||||||
|
|
||||||
|
ipl = 0
|
||||||
|
S = zero(eltype(plx))
|
||||||
|
@inbounds begin
|
||||||
|
for id1 in N:-1:1
|
||||||
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
|
TOBC = (id1==N)
|
||||||
|
|
||||||
|
for id2 = 1:id1-1
|
||||||
|
bu2, ru2 = up((b, r), id2, lp)
|
||||||
|
ipl = ipl + 1
|
||||||
|
|
||||||
|
TWP = (I[id1]==1) && (I[id2]==1)
|
||||||
|
TWH2 = TWP || ( (I[id1]==lp.iL[id1]) && (I[id2]==1) )
|
||||||
|
TWH3 = TWP || ( (I[id1]==1) && (I[id2]==lp.iL[id2]) )
|
||||||
|
|
||||||
|
# H2 staple
|
||||||
|
(b1, r1) = up((b,r), id1, lp)
|
||||||
|
(b2, r2) = up((b1,r1), id1, lp)
|
||||||
|
gb = U[b2,id2,r2]
|
||||||
|
|
||||||
|
(b2, r2) = up((b1,r1), id2, lp)
|
||||||
|
h2 = (U[b1,id1,r1]*gb)/U[b2,id1,r2]
|
||||||
|
|
||||||
|
# H3 staple
|
||||||
|
(b1, r1) = up((b,r), id2, lp)
|
||||||
|
(b2, r2) = up((b1,r1), id2, lp)
|
||||||
|
|
||||||
|
(b3, r3) = up((b1,r1), id1, lp)
|
||||||
|
|
||||||
|
gc = U[b3,id2,r3]
|
||||||
|
|
||||||
|
h3 = (U[b1,id2,r1]*U[b2,id1,r2])/gc
|
||||||
|
# END staples
|
||||||
|
|
||||||
|
ga = U[bu1,id2,ru1]
|
||||||
|
|
||||||
|
g2 = U[b,id2,r]\U[b,id1,r]
|
||||||
|
|
||||||
|
if ( (it == lp.iL[end]) || (it == 1) ) && !TOBC
|
||||||
|
S += 0.5*cG*(c0*tr(g2*ga/U[bu2,id1,ru2]) + c1*tr(g2*ga/h3) + c1*tr(g2*h2/U[bu2,id1,ru2]))
|
||||||
|
elseif (it == lp.iL[end]-1) && TOBC
|
||||||
|
S += c0*tr(g2*ga/U[bu2,id1,ru2]) + c1*tr(g2*ga/h3)
|
||||||
|
elseif (it == lp.iL[end]) && TOBC
|
||||||
|
nothing
|
||||||
|
else
|
||||||
|
if TWP
|
||||||
|
S += (ztw[ipl]*c0)*tr(g2*ga/U[bu2,id1,ru2])
|
||||||
|
else
|
||||||
|
S += c0*tr(g2*ga/U[bu2,id1,ru2])
|
||||||
|
end
|
||||||
|
if TWH2
|
||||||
|
S += (ztw[ipl]*c1)*tr(g2*h2/U[bu2,id1,ru2])
|
||||||
|
else
|
||||||
|
S += c1*tr(g2*h2/U[bu2,id1,ru2])
|
||||||
|
end
|
||||||
|
if TWH3
|
||||||
|
S += (ztw[ipl]*c1)*tr(g2*ga/h3)
|
||||||
|
else
|
||||||
|
S += c1*tr(g2*ga/h3)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
plx[I] = S
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,N,M,D}
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
b = Int64(CUDA.threadIdx().x)
|
||||||
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
I = point_coord((b,r), lp)
|
||||||
|
it = I[N]
|
||||||
|
|
||||||
|
S = zero(eltype(plx))
|
||||||
|
ipl = 0
|
||||||
|
for id1 in N:-1:1
|
||||||
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
|
TOBC = (id1==N)
|
||||||
|
|
||||||
|
for id2 = 1:id1-1
|
||||||
|
bu2, ru2 = up((b, r), id2, lp)
|
||||||
|
ipl = ipl + 1
|
||||||
|
TWP = (I[id1]==1) && (I[id2]==1)
|
||||||
|
|
||||||
|
gt1 = U[bu1,id2,ru1]
|
||||||
|
|
||||||
|
if ( (it == lp.iL[end]) || (it == 1)) && !TOBC
|
||||||
|
S += 0.5*cG*(tr(U[b,id1,r]*gt1 / (U[b,id2,r]*U[bu2,id1,ru2])))
|
||||||
|
elseif (it == lp.iL[end]) && TOBC
|
||||||
|
nothing
|
||||||
|
else
|
||||||
|
if TWP
|
||||||
|
S += ztw[ipl]*tr(U[b,id1,r]*gt1 / (U[b,id2,r]*U[bu2,id1,ru2]))
|
||||||
|
else
|
||||||
|
S += tr(U[b,id1,r]*gt1 / (U[b,id2,r]*U[bu2,id1,ru2]))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
plx[I] = S
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_force_wilson_pln!(frc1, frc2, U::AbstractArray{T}, Ubnd, cG, ztw, ipl, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,N,M,D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x)
|
||||||
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
I = point_coord((b,r), lp)
|
||||||
|
it = I[N]
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
id1, id2 = lp.plidx[ipl]
|
||||||
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
|
bu2, ru2 = up((b, r), id2, lp)
|
||||||
|
TWP = (I[id1]==1)&&(I[id2]==1)
|
||||||
|
|
||||||
|
TOBC = (id1 == N)
|
||||||
|
|
||||||
|
gt1 = U[bu1,id2,ru1]
|
||||||
|
|
||||||
|
g1 = gt1/U[bu2,id1,ru2]
|
||||||
|
g2 = U[b,id2,r]\U[b,id1,r]
|
||||||
|
|
||||||
|
if !TOBC && ( (it == 1) || (it == lp.iL[end]) )
|
||||||
|
X = 0.5*cG*projalg(U[b,id1,r]*g1/U[b,id2,r])
|
||||||
|
|
||||||
|
frc1[b ,id1, r ] -= X
|
||||||
|
frc1[b ,id2, r ] += X
|
||||||
|
frc2[bu1,id2,ru1] -= 0.5*cG*projalg(g1*g2)
|
||||||
|
frc2[bu2,id1,ru2] += 0.5*cG*projalg(g2*g1)
|
||||||
|
elseif TOBC && (it == lp.iL[end])
|
||||||
|
nothing
|
||||||
|
else
|
||||||
|
if TWP
|
||||||
|
X = projalg(ztw,U[b,id1,r]*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= projalg(ztw,g1*g2)
|
||||||
|
frc2[bu2,id1,ru2] += projalg(ztw,g2*g1)
|
||||||
|
else
|
||||||
|
X = projalg(U[b,id1,r]*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= projalg(g1*g2)
|
||||||
|
frc2[bu2,id1,ru2] += projalg(g2*g1)
|
||||||
|
end
|
||||||
|
frc1[b ,id1, r ] -= X
|
||||||
|
frc1[b ,id2, r ] += X
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG, ztw, ipl, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,N,M,D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x)
|
||||||
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
I = point_coord((b,r), lp)
|
||||||
|
it = I[N]
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
id1, id2 = lp.plidx[ipl]
|
||||||
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
|
bu2, ru2 = up((b, r), id2, lp)
|
||||||
|
|
||||||
|
TOBC = (id1 == N)
|
||||||
|
TWP = (I[id1]==1) && (I[id2]==1)
|
||||||
|
TWH1 = TWP || ( (I[id1]==1) && (I[id2]==2) )
|
||||||
|
TWH2 = TWP || ( (I[id1]==lp.iL[id1]) && (I[id2]==1) )
|
||||||
|
TWH3 = TWP || ( (I[id1]==1) && (I[id2]==lp.iL[id2]) )
|
||||||
|
TWH4 = TWP || ( (I[id1]==2) && (I[id2]==1) )
|
||||||
|
|
||||||
|
# H1 staple
|
||||||
|
(b1, r1) = dw((b,r), id2, lp)
|
||||||
|
(b2, r2) = up((b1,r1), id1, lp)
|
||||||
|
gc = U[b2,id2,r2]
|
||||||
|
h1 = (U[b1,id2,r1]\U[b1,id1,r1])*gc
|
||||||
|
|
||||||
|
# H2 staple
|
||||||
|
(b1, r1) = up((b,r), id1, lp)
|
||||||
|
(b2, r2) = up((b1,r1), id1, lp)
|
||||||
|
gb = U[b2,id2,r2]
|
||||||
|
|
||||||
|
(b2, r2) = up((b1,r1), id2, lp)
|
||||||
|
h2 = (U[b1,id1,r1]*gb)/U[b2,id1,r2]
|
||||||
|
|
||||||
|
# H3 staple
|
||||||
|
(b1, r1) = up((b,r), id2, lp)
|
||||||
|
(b2, r2) = up((b1,r1), id2, lp)
|
||||||
|
(b3, r3) = up((b1,r1), id1, lp)
|
||||||
|
gc = U[b3,id2,r3]
|
||||||
|
h3 = (U[b1,id2,r1]*U[b2,id1,r2])/gc
|
||||||
|
|
||||||
|
# H4 staple
|
||||||
|
(b1, r1) = dw((b,r), id1, lp)
|
||||||
|
(b2, r2) = up((b1,r1), id2, lp)
|
||||||
|
h4 = (U[b1,id1,r1]\U[b1,id2,r1])*U[b2,id1,r2]
|
||||||
|
# END staples
|
||||||
|
|
||||||
|
ga = U[bu1,id2,ru1]
|
||||||
|
|
||||||
|
g1 = ga/U[bu2,id1,ru2]
|
||||||
|
g2 = U[b,id2,r]\U[b,id1,r]
|
||||||
|
|
||||||
|
if !TOBC && ( (it == 1) || (it == lp.iL[end]) )
|
||||||
|
X = 0.5*cG*(c0*projalg(U[b,id1,r]*g1/U[b,id2,r]) + c1*projalg(U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2])) + c1*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3)) )
|
||||||
|
|
||||||
|
frc1[b,id1,r] -= X + 0.5*cG*c1*projalg(U[b,id1,r]*g1/h4)
|
||||||
|
frc1[b,id2,r] += X + 0.5*cG*c1*projalg(h1*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= 0.5*cG*c0*projalg(g1*g2)
|
||||||
|
frc2[bu2,id1,ru2] += 0.5*cG*c0*projalg(g2*g1)
|
||||||
|
frc2[bu1,id2,ru1] -= 0.5*cG*c1*projalg((g1/U[b,id2,r])*h1)
|
||||||
|
frc2[bu2,id1,ru2] += 0.5*cG*c1*projalg((U[b,id2,r]\h1)*g1)
|
||||||
|
frc2[bu2,id1,ru2] += 0.5*cG*c1*projalg(g2*h2/U[bu2,id1,ru2])
|
||||||
|
frc2[bu1,id2,ru1] -= 0.5*cG*c1*projalg((ga/h3)*g2)
|
||||||
|
frc2[bu1,id2,ru1] -= 0.5*cG*c1*projalg((g1/h4)*U[b,id1,r])
|
||||||
|
frc2[bu2,id1,ru2] += 0.5*cG*c1*projalg(h4\U[b,id1,r]*g1)
|
||||||
|
elseif TOBC && (it == lp.iL[end])
|
||||||
|
nothing
|
||||||
|
elseif TOBC && (it == 1)
|
||||||
|
X = c0*projalg(U[b,id1,r]*g1/U[b,id2,r]) + c1*projalg(U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2])) + c1*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3))
|
||||||
|
|
||||||
|
frc1[b,id1,r] -= X
|
||||||
|
frc1[b,id2,r] += X + c1*projalg(h1*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= c0*projalg(g1*g2)
|
||||||
|
frc2[bu2,id1,ru2] += c0*projalg(g2*g1)
|
||||||
|
frc2[bu1,id2,ru1] -= c1*projalg((g1/U[b,id2,r])*h1)
|
||||||
|
frc2[bu2,id1,ru2] += c1*projalg((U[b,id2,r]\h1)*g1)
|
||||||
|
frc2[bu2,id1,ru2] += c1*projalg(g2*h2/U[bu2,id1,ru2])
|
||||||
|
frc2[bu1,id2,ru1] -= c1*projalg((ga/h3)*g2)
|
||||||
|
elseif TOBC && (it == (lp.iL[end]-1) )
|
||||||
|
X = c0*projalg(U[b,id1,r]*g1/U[b,id2,r]) + c1*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3))
|
||||||
|
|
||||||
|
frc1[b,id1,r] -= X + c1*projalg(U[b,id1,r]*g1/h4)
|
||||||
|
frc1[b,id2,r] += X + c1*projalg(h1*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= c0*projalg(g1*g2)
|
||||||
|
frc2[bu2,id1,ru2] += c0*projalg(g2*g1)
|
||||||
|
frc2[bu1,id2,ru1] -= c1*projalg((g1/U[b,id2,r])*h1)
|
||||||
|
frc2[bu2,id1,ru2] += c1*projalg((U[b,id2,r]\h1)*g1)
|
||||||
|
frc2[bu1,id2,ru1] -= c1*projalg((ga/h3)*g2)
|
||||||
|
frc2[bu1,id2,ru1] -= c1*projalg((g1/h4)*U[b,id1,r])
|
||||||
|
frc2[bu2,id1,ru2] += c1*projalg(h4\U[b,id1,r]*g1)
|
||||||
|
else
|
||||||
|
if TWP
|
||||||
|
X = projalg(c0*ztw,U[b,id1,r]*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= projalg(c0*ztw,g1*g2)
|
||||||
|
frc2[bu2,id1,ru2] += projalg(c0*ztw,g2*g1)
|
||||||
|
else
|
||||||
|
X = c0*projalg(U[b,id1,r]*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= c0*projalg(g1*g2)
|
||||||
|
frc2[bu2,id1,ru2] += c0*projalg(g2*g1)
|
||||||
|
end
|
||||||
|
if TWH1
|
||||||
|
frc1[b,id2,r] += projalg(ztw*c1,h1*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(g1/U[b,id2,r])*h1)
|
||||||
|
frc2[bu2,id1,ru2] += projalg(ztw*c1,(U[b,id2,r]\h1)*g1)
|
||||||
|
else
|
||||||
|
frc1[b,id2,r] += c1*projalg(h1*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= c1*projalg((g1/U[b,id2,r])*h1)
|
||||||
|
frc2[bu2,id1,ru2] += c1*projalg((U[b,id2,r]\h1)*g1)
|
||||||
|
end
|
||||||
|
if TWH2
|
||||||
|
X += projalg(ztw*c1,U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2]))
|
||||||
|
frc2[bu2,id1,ru2] += projalg(ztw*c1,g2*h2/U[bu2,id1,ru2])
|
||||||
|
else
|
||||||
|
X += c1*projalg(U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2]))
|
||||||
|
frc2[bu2,id1,ru2] += c1*projalg(g2*h2/U[bu2,id1,ru2])
|
||||||
|
end
|
||||||
|
if TWH3
|
||||||
|
X += projalg(ztw*c1,U[b,id1,r]*ga/(U[b,id2,r]*h3))
|
||||||
|
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(ga/h3)*g2)
|
||||||
|
else
|
||||||
|
X += c1*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3))
|
||||||
|
frc2[bu1,id2,ru1] -= c1*projalg((ga/h3)*g2)
|
||||||
|
end
|
||||||
|
if TWH4
|
||||||
|
frc1[b,id1,r] -= projalg(ztw*c1,U[b,id1,r]*g1/h4)
|
||||||
|
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(g1/h4)*U[b,id1,r])
|
||||||
|
frc2[bu2,id1,ru2] += projalg(ztw*c1,h4\U[b,id1,r]*g1)
|
||||||
|
else
|
||||||
|
frc1[b,id1,r] -= c1*projalg(U[b,id1,r]*g1/h4)
|
||||||
|
frc2[bu1,id2,ru1] -= c1*projalg((g1/h4)*U[b,id1,r])
|
||||||
|
frc2[bu2,id1,ru2] += c1*projalg(h4\U[b,id1,r]*g1)
|
||||||
|
end
|
||||||
|
frc1[b,id1,r] -= X
|
||||||
|
frc1[b,id2,r] += X
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
## SF
|
||||||
|
##
|
||||||
|
function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, ztw, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {T,NB,N,M,D}
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x)
|
b = Int64(CUDA.threadIdx().x)
|
||||||
r = Int64(CUDA.blockIdx().x)
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
@ -21,8 +336,8 @@ function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, zt
|
||||||
@inbounds begin
|
@inbounds begin
|
||||||
for id1 in N:-1:1
|
for id1 in N:-1:1
|
||||||
bu1, ru1 = up((b, r), id1, lp)
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1==N)
|
SFBC = (id1==N)
|
||||||
|
|
||||||
for id2 = 1:id1-1
|
for id2 = 1:id1-1
|
||||||
bu2, ru2 = up((b, r), id2, lp)
|
bu2, ru2 = up((b, r), id2, lp)
|
||||||
ipl = ipl + 1
|
ipl = ipl + 1
|
||||||
|
@ -30,7 +345,7 @@ function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, zt
|
||||||
TWP = (I[id1]==1) && (I[id2]==1)
|
TWP = (I[id1]==1) && (I[id2]==1)
|
||||||
TWH2 = TWP || ( (I[id1]==lp.iL[id1]) && (I[id2]==1) )
|
TWH2 = TWP || ( (I[id1]==lp.iL[id1]) && (I[id2]==1) )
|
||||||
TWH3 = TWP || ( (I[id1]==1) && (I[id2]==lp.iL[id2]) )
|
TWH3 = TWP || ( (I[id1]==1) && (I[id2]==lp.iL[id2]) )
|
||||||
|
|
||||||
# H2 staple
|
# H2 staple
|
||||||
(b1, r1) = up((b,r), id1, lp)
|
(b1, r1) = up((b,r), id1, lp)
|
||||||
(b2, r2) = up((b1,r1), id1, lp)
|
(b2, r2) = up((b1,r1), id1, lp)
|
||||||
|
@ -39,14 +354,14 @@ function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, zt
|
||||||
else
|
else
|
||||||
gb = U[b2,id2,r2]
|
gb = U[b2,id2,r2]
|
||||||
end
|
end
|
||||||
|
|
||||||
(b2, r2) = up((b1,r1), id2, lp)
|
(b2, r2) = up((b1,r1), id2, lp)
|
||||||
h2 = (U[b1,id1,r1]*gb)/U[b2,id1,r2]
|
h2 = (U[b1,id1,r1]*gb)/U[b2,id1,r2]
|
||||||
|
|
||||||
# H3 staple
|
# H3 staple
|
||||||
(b1, r1) = up((b,r), id2, lp)
|
(b1, r1) = up((b,r), id2, lp)
|
||||||
(b2, r2) = up((b1,r1), id2, lp)
|
(b2, r2) = up((b1,r1), id2, lp)
|
||||||
|
|
||||||
(b3, r3) = up((b1,r1), id1, lp)
|
(b3, r3) = up((b1,r1), id1, lp)
|
||||||
if SFBC && (it == lp.iL[end])
|
if SFBC && (it == lp.iL[end])
|
||||||
gc = Ubnd[id2]
|
gc = Ubnd[id2]
|
||||||
|
@ -55,15 +370,15 @@ function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, zt
|
||||||
end
|
end
|
||||||
h3 = (U[b1,id2,r1]*U[b2,id1,r2])/gc
|
h3 = (U[b1,id2,r1]*U[b2,id1,r2])/gc
|
||||||
# END staples
|
# END staples
|
||||||
|
|
||||||
if SFBC && (it == lp.iL[end])
|
if SFBC && (it == lp.iL[end])
|
||||||
ga = Ubnd[id2]
|
ga = Ubnd[id2]
|
||||||
else
|
else
|
||||||
ga = U[bu1,id2,ru1]
|
ga = U[bu1,id2,ru1]
|
||||||
end
|
end
|
||||||
|
|
||||||
g2 = U[b,id2,r]\U[b,id1,r]
|
g2 = U[b,id2,r]\U[b,id1,r]
|
||||||
|
|
||||||
if (it == lp.iL[end]) && SFBC
|
if (it == lp.iL[end]) && SFBC
|
||||||
S += cG*(c0*tr(g2*ga/U[bu2,id1,ru2]) + (3*c1/2)*tr(g2*ga/h3))
|
S += cG*(c0*tr(g2*ga/U[bu2,id1,ru2]) + (3*c1/2)*tr(g2*ga/h3))
|
||||||
elseif (it == 1) && SFBC
|
elseif (it == 1) && SFBC
|
||||||
|
@ -85,17 +400,17 @@ function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, zt
|
||||||
S += c1*tr(g2*ga/h3)
|
S += c1*tr(g2*ga/h3)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
plx[I] = S
|
plx[I] = S
|
||||||
end
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::SpaceParm{N,M,B,D}) where {T,N,M,B,D}
|
function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {T,N,M,D}
|
||||||
|
|
||||||
|
|
||||||
@inbounds begin
|
@inbounds begin
|
||||||
|
@ -103,21 +418,20 @@ function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::SpaceParm{N,M,B
|
||||||
r = Int64(CUDA.blockIdx().x)
|
r = Int64(CUDA.blockIdx().x)
|
||||||
I = point_coord((b,r), lp)
|
I = point_coord((b,r), lp)
|
||||||
it = I[N]
|
it = I[N]
|
||||||
IBND = ( ( (B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) &&
|
IBND = ( (it == 1) || (it == lp.iL[end]))
|
||||||
( (it == 1) || (it == lp.iL[end])) )
|
|
||||||
|
|
||||||
S = zero(eltype(plx))
|
S = zero(eltype(plx))
|
||||||
ipl = 0
|
ipl = 0
|
||||||
for id1 in N:-1:1
|
for id1 in N:-1:1
|
||||||
bu1, ru1 = up((b, r), id1, lp)
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
SFBND = IBND && (id1 == N)
|
SFBND = IBND && (id1 == N)
|
||||||
|
|
||||||
for id2 = 1:id1-1
|
for id2 = 1:id1-1
|
||||||
bu2, ru2 = up((b, r), id2, lp)
|
bu2, ru2 = up((b, r), id2, lp)
|
||||||
ipl = ipl + 1
|
ipl = ipl + 1
|
||||||
TWP = (I[id1]==1) && (I[id2]==1)
|
TWP = (I[id1]==1) && (I[id2]==1)
|
||||||
|
|
||||||
if SFBND && (it == lp.iL[end])
|
if SFBND && (it == lp.iL[end])
|
||||||
gt1 = Ubnd[id2]
|
gt1 = Ubnd[id2]
|
||||||
else
|
else
|
||||||
gt1 = U[bu1,id2,ru1]
|
gt1 = U[bu1,id2,ru1]
|
||||||
|
@ -134,46 +448,46 @@ function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::SpaceParm{N,M,B
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
plx[I] = S
|
plx[I] = S
|
||||||
end
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
function krnl_force_wilson_pln!(frc1, frc2, U::AbstractArray{T}, Ubnd, cG, ztw, ipl, lp::SpaceParm{N,M,B,D}) where {T,N,M,B,D}
|
function krnl_force_wilson_pln!(frc1, frc2, U::AbstractArray{T}, Ubnd, cG, ztw, ipl, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {T,N,M,D}
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x)
|
b = Int64(CUDA.threadIdx().x)
|
||||||
r = Int64(CUDA.blockIdx().x)
|
r = Int64(CUDA.blockIdx().x)
|
||||||
I = point_coord((b,r), lp)
|
I = point_coord((b,r), lp)
|
||||||
it = I[N]
|
it = I[N]
|
||||||
|
|
||||||
@inbounds begin
|
@inbounds begin
|
||||||
id1, id2 = lp.plidx[ipl]
|
id1, id2 = lp.plidx[ipl]
|
||||||
bu1, ru1 = up((b, r), id1, lp)
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
bu2, ru2 = up((b, r), id2, lp)
|
bu2, ru2 = up((b, r), id2, lp)
|
||||||
TWP = (I[id1]==1)&&(I[id2]==1)
|
TWP = (I[id1]==1)&&(I[id2]==1)
|
||||||
|
|
||||||
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == N)
|
SFBC = (id1 == N)
|
||||||
|
|
||||||
if SFBC && (it == lp.iL[end])
|
if SFBC && (it == lp.iL[end])
|
||||||
gt1 = Ubnd[id2]
|
gt1 = Ubnd[id2]
|
||||||
else
|
else
|
||||||
gt1 = U[bu1,id2,ru1]
|
gt1 = U[bu1,id2,ru1]
|
||||||
end
|
end
|
||||||
|
|
||||||
g1 = gt1/U[bu2,id1,ru2]
|
g1 = gt1/U[bu2,id1,ru2]
|
||||||
g2 = U[b,id2,r]\U[b,id1,r]
|
g2 = U[b,id2,r]\U[b,id1,r]
|
||||||
|
|
||||||
if SFBC && (it == 1)
|
if SFBC && (it == 1)
|
||||||
X = cG*projalg(U[b,id1,r]*g1/U[b,id2,r])
|
X = cG*projalg(U[b,id1,r]*g1/U[b,id2,r])
|
||||||
|
|
||||||
frc1[b ,id1, r ] -= X
|
frc1[b ,id1, r ] -= X
|
||||||
frc2[bu1,id2,ru1] -= cG*projalg(g1*g2)
|
frc2[bu1,id2,ru1] -= cG*projalg(g1*g2)
|
||||||
frc2[bu2,id1,ru2] += cG*projalg(g2*g1)
|
frc2[bu2,id1,ru2] += cG*projalg(g2*g1)
|
||||||
elseif SFBC && (it == lp.iL[end])
|
elseif SFBC && (it == lp.iL[end])
|
||||||
X = cG*projalg(U[b,id1,r]*g1/U[b,id2,r])
|
X = cG*projalg(U[b,id1,r]*g1/U[b,id2,r])
|
||||||
|
|
||||||
frc1[b ,id1, r ] -= X
|
frc1[b ,id1, r ] -= X
|
||||||
frc1[b ,id2, r ] += X
|
frc1[b ,id2, r ] += X
|
||||||
frc2[bu2,id1,ru2] += cG*projalg(g2*g1)
|
frc2[bu2,id1,ru2] += cG*projalg(g2*g1)
|
||||||
|
@ -191,29 +505,29 @@ function krnl_force_wilson_pln!(frc1, frc2, U::AbstractArray{T}, Ubnd, cG, ztw,
|
||||||
frc1[b ,id2, r ] += X
|
frc1[b ,id2, r ] += X
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG, ztw, ipl, lp::SpaceParm{N,M,B,D}) where {T,N,M,B,D}
|
function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG, ztw, ipl, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {T,N,M,D}
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x)
|
b = Int64(CUDA.threadIdx().x)
|
||||||
r = Int64(CUDA.blockIdx().x)
|
r = Int64(CUDA.blockIdx().x)
|
||||||
I = point_coord((b,r), lp)
|
I = point_coord((b,r), lp)
|
||||||
it = I[N]
|
it = I[N]
|
||||||
|
|
||||||
@inbounds begin
|
@inbounds begin
|
||||||
id1, id2 = lp.plidx[ipl]
|
id1, id2 = lp.plidx[ipl]
|
||||||
bu1, ru1 = up((b, r), id1, lp)
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
bu2, ru2 = up((b, r), id2, lp)
|
bu2, ru2 = up((b, r), id2, lp)
|
||||||
|
|
||||||
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == N)
|
SFBC = (id1 == N)
|
||||||
TWP = (I[id1]==1) && (I[id2]==1)
|
TWP = (I[id1]==1) && (I[id2]==1)
|
||||||
TWH1 = TWP || ( (I[id1]==1) && (I[id2]==2) )
|
TWH1 = TWP || ( (I[id1]==1) && (I[id2]==2) )
|
||||||
TWH2 = TWP || ( (I[id1]==lp.iL[id1]) && (I[id2]==1) )
|
TWH2 = TWP || ( (I[id1]==lp.iL[id1]) && (I[id2]==1) )
|
||||||
TWH3 = TWP || ( (I[id1]==1) && (I[id2]==lp.iL[id2]) )
|
TWH3 = TWP || ( (I[id1]==1) && (I[id2]==lp.iL[id2]) )
|
||||||
TWH4 = TWP || ( (I[id1]==2) && (I[id2]==1) )
|
TWH4 = TWP || ( (I[id1]==2) && (I[id2]==1) )
|
||||||
|
|
||||||
# H1 staple
|
# H1 staple
|
||||||
(b1, r1) = dw((b,r), id2, lp)
|
(b1, r1) = dw((b,r), id2, lp)
|
||||||
(b2, r2) = up((b1,r1), id1, lp)
|
(b2, r2) = up((b1,r1), id1, lp)
|
||||||
|
@ -223,7 +537,7 @@ function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG,
|
||||||
gc = U[b2,id2,r2]
|
gc = U[b2,id2,r2]
|
||||||
end
|
end
|
||||||
h1 = (U[b1,id2,r1]\U[b1,id1,r1])*gc
|
h1 = (U[b1,id2,r1]\U[b1,id1,r1])*gc
|
||||||
|
|
||||||
# H2 staple
|
# H2 staple
|
||||||
(b1, r1) = up((b,r), id1, lp)
|
(b1, r1) = up((b,r), id1, lp)
|
||||||
(b2, r2) = up((b1,r1), id1, lp)
|
(b2, r2) = up((b1,r1), id1, lp)
|
||||||
|
@ -232,10 +546,10 @@ function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG,
|
||||||
else
|
else
|
||||||
gb = U[b2,id2,r2]
|
gb = U[b2,id2,r2]
|
||||||
end
|
end
|
||||||
|
|
||||||
(b2, r2) = up((b1,r1), id2, lp)
|
(b2, r2) = up((b1,r1), id2, lp)
|
||||||
h2 = (U[b1,id1,r1]*gb)/U[b2,id1,r2]
|
h2 = (U[b1,id1,r1]*gb)/U[b2,id1,r2]
|
||||||
|
|
||||||
# H3 staple
|
# H3 staple
|
||||||
(b1, r1) = up((b,r), id2, lp)
|
(b1, r1) = up((b,r), id2, lp)
|
||||||
(b2, r2) = up((b1,r1), id2, lp)
|
(b2, r2) = up((b1,r1), id2, lp)
|
||||||
|
@ -246,42 +560,42 @@ function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG,
|
||||||
gc = U[b3,id2,r3]
|
gc = U[b3,id2,r3]
|
||||||
end
|
end
|
||||||
h3 = (U[b1,id2,r1]*U[b2,id1,r2])/gc
|
h3 = (U[b1,id2,r1]*U[b2,id1,r2])/gc
|
||||||
|
|
||||||
# H4 staple
|
# H4 staple
|
||||||
(b1, r1) = dw((b,r), id1, lp)
|
(b1, r1) = dw((b,r), id1, lp)
|
||||||
(b2, r2) = up((b1,r1), id2, lp)
|
(b2, r2) = up((b1,r1), id2, lp)
|
||||||
h4 = (U[b1,id1,r1]\U[b1,id2,r1])*U[b2,id1,r2]
|
h4 = (U[b1,id1,r1]\U[b1,id2,r1])*U[b2,id1,r2]
|
||||||
# END staples
|
# END staples
|
||||||
|
|
||||||
if SFBC && (it == lp.iL[end])
|
if SFBC && (it == lp.iL[end])
|
||||||
ga = Ubnd[id2]
|
ga = Ubnd[id2]
|
||||||
else
|
else
|
||||||
ga = U[bu1,id2,ru1]
|
ga = U[bu1,id2,ru1]
|
||||||
end
|
end
|
||||||
|
|
||||||
g1 = ga/U[bu2,id1,ru2]
|
g1 = ga/U[bu2,id1,ru2]
|
||||||
g2 = U[b,id2,r]\U[b,id1,r]
|
g2 = U[b,id2,r]\U[b,id1,r]
|
||||||
|
|
||||||
if SFBC && (it == 1)
|
if SFBC && (it == 1)
|
||||||
X = (cG*c0)*projalg(U[b,id1,r]*g1/U[b,id2,r]) + c1*projalg(U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2])) +
|
X = (cG*c0)*projalg(U[b,id1,r]*g1/U[b,id2,r]) + c1*projalg(U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2])) +
|
||||||
(3*c1*cG/2)*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3))
|
(3*c1*cG/2)*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3))
|
||||||
|
|
||||||
frc1[b,id1,r] -= X
|
frc1[b,id1,r] -= X
|
||||||
|
|
||||||
frc2[bu1,id2,ru1] -= (cG*c0)*projalg(g1*g2) + (3*c1*cG/2)*projalg((ga/h3)*g2) +
|
frc2[bu1,id2,ru1] -= (cG*c0)*projalg(g1*g2) + (3*c1*cG/2)*projalg((ga/h3)*g2) +
|
||||||
(3*c1*cG/2)*projalg((g1/U[b,id2,r])*h1)
|
(3*c1*cG/2)*projalg((g1/U[b,id2,r])*h1)
|
||||||
|
|
||||||
frc2[bu2,id1,ru2] += (cG*c0)*projalg(g2*g1) + (3*c1*cG/2) * projalg((U[b,id2,r]\h1)*g1) +
|
frc2[bu2,id1,ru2] += (cG*c0)*projalg(g2*g1) + (3*c1*cG/2) * projalg((U[b,id2,r]\h1)*g1) +
|
||||||
c1*projalg(g2*h2/U[bu2,id1,ru2])
|
c1*projalg(g2*h2/U[bu2,id1,ru2])
|
||||||
elseif SFBC && (it == lp.iL[end])
|
elseif SFBC && (it == lp.iL[end])
|
||||||
X = (cG*c0)*projalg(U[b,id1,r]*g1/U[b,id2,r]) +
|
X = (cG*c0)*projalg(U[b,id1,r]*g1/U[b,id2,r]) +
|
||||||
(3*c1*cG/2) * (projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3)))
|
(3*c1*cG/2) * (projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3)))
|
||||||
|
|
||||||
frc1[b,id1,r] -= X + c1*projalg(U[b,id1,r]*g1/h4)
|
frc1[b,id1,r] -= X + c1*projalg(U[b,id1,r]*g1/h4)
|
||||||
frc1[b,id2,r] += X + (3*c1*cG/2)*projalg(h1*g1/U[b,id2,r])
|
frc1[b,id2,r] += X + (3*c1*cG/2)*projalg(h1*g1/U[b,id2,r])
|
||||||
|
|
||||||
frc2[bu2,id1,ru2] += (cG*c0)*projalg(g2*g1) + (3*c1*cG/2) * projalg((U[b,id2,r]\h1)*g1) +
|
frc2[bu2,id1,ru2] += (cG*c0)*projalg(g2*g1) + (3*c1*cG/2) * projalg((U[b,id2,r]\h1)*g1) +
|
||||||
c1 * projalg(h4\U[b,id1,r]*g1)
|
c1 * projalg(h4\U[b,id1,r]*g1)
|
||||||
else
|
else
|
||||||
if TWP
|
if TWP
|
||||||
X = projalg(c0*ztw,U[b,id1,r]*g1/U[b,id2,r])
|
X = projalg(c0*ztw,U[b,id1,r]*g1/U[b,id2,r])
|
||||||
|
@ -294,11 +608,11 @@ function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG,
|
||||||
end
|
end
|
||||||
if TWH1
|
if TWH1
|
||||||
frc1[b,id2,r] += projalg(ztw*c1,h1*g1/U[b,id2,r])
|
frc1[b,id2,r] += projalg(ztw*c1,h1*g1/U[b,id2,r])
|
||||||
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(g1/U[b,id2,r])*h1)
|
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(g1/U[b,id2,r])*h1)
|
||||||
frc2[bu2,id1,ru2] += projalg(ztw*c1,(U[b,id2,r]\h1)*g1)
|
frc2[bu2,id1,ru2] += projalg(ztw*c1,(U[b,id2,r]\h1)*g1)
|
||||||
else
|
else
|
||||||
frc1[b,id2,r] += c1*projalg(h1*g1/U[b,id2,r])
|
frc1[b,id2,r] += c1*projalg(h1*g1/U[b,id2,r])
|
||||||
frc2[bu1,id2,ru1] -= c1*projalg((g1/U[b,id2,r])*h1)
|
frc2[bu1,id2,ru1] -= c1*projalg((g1/U[b,id2,r])*h1)
|
||||||
frc2[bu2,id1,ru2] += c1*projalg((U[b,id2,r]\h1)*g1)
|
frc2[bu2,id1,ru2] += c1*projalg((U[b,id2,r]\h1)*g1)
|
||||||
end
|
end
|
||||||
if TWH2
|
if TWH2
|
||||||
|
@ -310,27 +624,274 @@ function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG,
|
||||||
end
|
end
|
||||||
if TWH3
|
if TWH3
|
||||||
X += projalg(ztw*c1,U[b,id1,r]*ga/(U[b,id2,r]*h3))
|
X += projalg(ztw*c1,U[b,id1,r]*ga/(U[b,id2,r]*h3))
|
||||||
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(ga/h3)*g2)
|
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(ga/h3)*g2)
|
||||||
else
|
else
|
||||||
X += c1*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3))
|
X += c1*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3))
|
||||||
frc2[bu1,id2,ru1] -= c1*projalg((ga/h3)*g2)
|
frc2[bu1,id2,ru1] -= c1*projalg((ga/h3)*g2)
|
||||||
end
|
end
|
||||||
if TWH4
|
if TWH4
|
||||||
frc1[b,id1,r] -= projalg(ztw*c1,U[b,id1,r]*g1/h4)
|
frc1[b,id1,r] -= projalg(ztw*c1,U[b,id1,r]*g1/h4)
|
||||||
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(g1/h4)*U[b,id1,r])
|
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(g1/h4)*U[b,id1,r])
|
||||||
frc2[bu2,id1,ru2] += projalg(ztw*c1,h4\U[b,id1,r]*g1)
|
frc2[bu2,id1,ru2] += projalg(ztw*c1,h4\U[b,id1,r]*g1)
|
||||||
else
|
else
|
||||||
frc1[b,id1,r] -= c1*projalg(U[b,id1,r]*g1/h4)
|
frc1[b,id1,r] -= c1*projalg(U[b,id1,r]*g1/h4)
|
||||||
frc2[bu1,id2,ru1] -= c1*projalg((g1/h4)*U[b,id1,r])
|
frc2[bu1,id2,ru1] -= c1*projalg((g1/h4)*U[b,id1,r])
|
||||||
frc2[bu2,id1,ru2] += c1*projalg(h4\U[b,id1,r]*g1)
|
frc2[bu2,id1,ru2] += c1*projalg(h4\U[b,id1,r]*g1)
|
||||||
|
end
|
||||||
|
frc1[b,id1,r] -= X
|
||||||
|
frc1[b,id2,r] += X
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
## PERIODIC
|
||||||
|
##
|
||||||
|
function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, ztw, lp::SpaceParm{N,M,BC_PERIODIC,D}) where {T,NB,N,M,D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x)
|
||||||
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
I = point_coord((b,r), lp)
|
||||||
|
it = I[N]
|
||||||
|
|
||||||
|
ipl = 0
|
||||||
|
S = zero(eltype(plx))
|
||||||
|
@inbounds begin
|
||||||
|
for id1 in N:-1:1
|
||||||
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
|
|
||||||
|
for id2 = 1:id1-1
|
||||||
|
bu2, ru2 = up((b, r), id2, lp)
|
||||||
|
ipl = ipl + 1
|
||||||
|
|
||||||
|
TWP = (I[id1]==1) && (I[id2]==1)
|
||||||
|
TWH2 = TWP || ( (I[id1]==lp.iL[id1]) && (I[id2]==1) )
|
||||||
|
TWH3 = TWP || ( (I[id1]==1) && (I[id2]==lp.iL[id2]) )
|
||||||
|
|
||||||
|
# H2 staple
|
||||||
|
(b1, r1) = up((b,r), id1, lp)
|
||||||
|
(b2, r2) = up((b1,r1), id1, lp)
|
||||||
|
gb = U[b2,id2,r2]
|
||||||
|
|
||||||
|
(b2, r2) = up((b1,r1), id2, lp)
|
||||||
|
h2 = (U[b1,id1,r1]*gb)/U[b2,id1,r2]
|
||||||
|
|
||||||
|
# H3 staple
|
||||||
|
(b1, r1) = up((b,r), id2, lp)
|
||||||
|
(b2, r2) = up((b1,r1), id2, lp)
|
||||||
|
|
||||||
|
(b3, r3) = up((b1,r1), id1, lp)
|
||||||
|
|
||||||
|
gc = U[b3,id2,r3]
|
||||||
|
|
||||||
|
h3 = (U[b1,id2,r1]*U[b2,id1,r2])/gc
|
||||||
|
# END staples
|
||||||
|
|
||||||
|
ga = U[bu1,id2,ru1]
|
||||||
|
|
||||||
|
g2 = U[b,id2,r]\U[b,id1,r]
|
||||||
|
|
||||||
|
if TWP
|
||||||
|
S += (ztw[ipl]*c0)*tr(g2*ga/U[bu2,id1,ru2])
|
||||||
|
else
|
||||||
|
S += c0*tr(g2*ga/U[bu2,id1,ru2])
|
||||||
|
end
|
||||||
|
if TWH2
|
||||||
|
S += (ztw[ipl]*c1)*tr(g2*h2/U[bu2,id1,ru2])
|
||||||
|
else
|
||||||
|
S += c1*tr(g2*h2/U[bu2,id1,ru2])
|
||||||
|
end
|
||||||
|
if TWH3
|
||||||
|
S += (ztw[ipl]*c1)*tr(g2*ga/h3)
|
||||||
|
else
|
||||||
|
S += c1*tr(g2*ga/h3)
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
frc1[b,id1,r] -= X
|
|
||||||
frc1[b,id2,r] += X
|
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
plx[I] = S
|
||||||
end
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::SpaceParm{N,M,BC_PERIODIC,D}) where {T,N,M,D}
|
||||||
|
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
b = Int64(CUDA.threadIdx().x)
|
||||||
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
I = point_coord((b,r), lp)
|
||||||
|
it = I[N]
|
||||||
|
|
||||||
|
S = zero(eltype(plx))
|
||||||
|
ipl = 0
|
||||||
|
for id1 in N:-1:1
|
||||||
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
|
|
||||||
|
for id2 = 1:id1-1
|
||||||
|
bu2, ru2 = up((b, r), id2, lp)
|
||||||
|
ipl = ipl + 1
|
||||||
|
TWP = (I[id1]==1) && (I[id2]==1)
|
||||||
|
|
||||||
|
gt1 = U[bu1,id2,ru1]
|
||||||
|
|
||||||
|
if TWP
|
||||||
|
S += ztw[ipl]*tr(U[b,id1,r]*gt1 / (U[b,id2,r]*U[bu2,id1,ru2]))
|
||||||
|
else
|
||||||
|
S += tr(U[b,id1,r]*gt1 / (U[b,id2,r]*U[bu2,id1,ru2]))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
plx[I] = S
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_force_wilson_pln!(frc1, frc2, U::AbstractArray{T}, Ubnd, cG, ztw, ipl, lp::SpaceParm{N,M,BC_PERIODIC,D}) where {T,N,M,D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x)
|
||||||
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
I = point_coord((b,r), lp)
|
||||||
|
it = I[N]
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
id1, id2 = lp.plidx[ipl]
|
||||||
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
|
bu2, ru2 = up((b, r), id2, lp)
|
||||||
|
TWP = (I[id1]==1)&&(I[id2]==1)
|
||||||
|
|
||||||
|
gt1 = U[bu1,id2,ru1]
|
||||||
|
|
||||||
|
g1 = gt1/U[bu2,id1,ru2]
|
||||||
|
g2 = U[b,id2,r]\U[b,id1,r]
|
||||||
|
|
||||||
|
if TWP
|
||||||
|
X = projalg(ztw,U[b,id1,r]*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= projalg(ztw,g1*g2)
|
||||||
|
frc2[bu2,id1,ru2] += projalg(ztw,g2*g1)
|
||||||
|
else
|
||||||
|
X = projalg(U[b,id1,r]*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= projalg(g1*g2)
|
||||||
|
frc2[bu2,id1,ru2] += projalg(g2*g1)
|
||||||
|
end
|
||||||
|
frc1[b ,id1, r ] -= X
|
||||||
|
frc1[b ,id2, r ] += X
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG, ztw, ipl, lp::SpaceParm{N,M,BC_PERIODIC,D}) where {T,N,M,D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x)
|
||||||
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
I = point_coord((b,r), lp)
|
||||||
|
it = I[N]
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
id1, id2 = lp.plidx[ipl]
|
||||||
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
|
bu2, ru2 = up((b, r), id2, lp)
|
||||||
|
|
||||||
|
TWP = (I[id1]==1) && (I[id2]==1)
|
||||||
|
TWH1 = TWP || ( (I[id1]==1) && (I[id2]==2) )
|
||||||
|
TWH2 = TWP || ( (I[id1]==lp.iL[id1]) && (I[id2]==1) )
|
||||||
|
TWH3 = TWP || ( (I[id1]==1) && (I[id2]==lp.iL[id2]) )
|
||||||
|
TWH4 = TWP || ( (I[id1]==2) && (I[id2]==1) )
|
||||||
|
|
||||||
|
# H1 staple
|
||||||
|
(b1, r1) = dw((b,r), id2, lp)
|
||||||
|
(b2, r2) = up((b1,r1), id1, lp)
|
||||||
|
|
||||||
|
gc = U[b2,id2,r2]
|
||||||
|
|
||||||
|
h1 = (U[b1,id2,r1]\U[b1,id1,r1])*gc
|
||||||
|
|
||||||
|
# H2 staple
|
||||||
|
(b1, r1) = up((b,r), id1, lp)
|
||||||
|
(b2, r2) = up((b1,r1), id1, lp)
|
||||||
|
|
||||||
|
gb = U[b2,id2,r2]
|
||||||
|
|
||||||
|
(b2, r2) = up((b1,r1), id2, lp)
|
||||||
|
h2 = (U[b1,id1,r1]*gb)/U[b2,id1,r2]
|
||||||
|
|
||||||
|
# H3 staple
|
||||||
|
(b1, r1) = up((b,r), id2, lp)
|
||||||
|
(b2, r2) = up((b1,r1), id2, lp)
|
||||||
|
(b3, r3) = up((b1,r1), id1, lp)
|
||||||
|
|
||||||
|
gc = U[b3,id2,r3]
|
||||||
|
h3 = (U[b1,id2,r1]*U[b2,id1,r2])/gc
|
||||||
|
|
||||||
|
# H4 staple
|
||||||
|
(b1, r1) = dw((b,r), id1, lp)
|
||||||
|
(b2, r2) = up((b1,r1), id2, lp)
|
||||||
|
h4 = (U[b1,id1,r1]\U[b1,id2,r1])*U[b2,id1,r2]
|
||||||
|
# END staples
|
||||||
|
|
||||||
|
ga = U[bu1,id2,ru1]
|
||||||
|
|
||||||
|
g1 = ga/U[bu2,id1,ru2]
|
||||||
|
g2 = U[b,id2,r]\U[b,id1,r]
|
||||||
|
|
||||||
|
if TWP
|
||||||
|
X = projalg(c0*ztw,U[b,id1,r]*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= projalg(c0*ztw,g1*g2)
|
||||||
|
frc2[bu2,id1,ru2] += projalg(c0*ztw,g2*g1)
|
||||||
|
else
|
||||||
|
X = c0*projalg(U[b,id1,r]*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= c0*projalg(g1*g2)
|
||||||
|
frc2[bu2,id1,ru2] += c0*projalg(g2*g1)
|
||||||
|
end
|
||||||
|
if TWH1
|
||||||
|
frc1[b,id2,r] += projalg(ztw*c1,h1*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(g1/U[b,id2,r])*h1)
|
||||||
|
frc2[bu2,id1,ru2] += projalg(ztw*c1,(U[b,id2,r]\h1)*g1)
|
||||||
|
else
|
||||||
|
frc1[b,id2,r] += c1*projalg(h1*g1/U[b,id2,r])
|
||||||
|
frc2[bu1,id2,ru1] -= c1*projalg((g1/U[b,id2,r])*h1)
|
||||||
|
frc2[bu2,id1,ru2] += c1*projalg((U[b,id2,r]\h1)*g1)
|
||||||
|
end
|
||||||
|
if TWH2
|
||||||
|
X += projalg(ztw*c1,U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2]))
|
||||||
|
frc2[bu2,id1,ru2] += projalg(ztw*c1,g2*h2/U[bu2,id1,ru2])
|
||||||
|
else
|
||||||
|
X += c1*projalg(U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2]))
|
||||||
|
frc2[bu2,id1,ru2] += c1*projalg(g2*h2/U[bu2,id1,ru2])
|
||||||
|
end
|
||||||
|
if TWH3
|
||||||
|
X += projalg(ztw*c1,U[b,id1,r]*ga/(U[b,id2,r]*h3))
|
||||||
|
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(ga/h3)*g2)
|
||||||
|
else
|
||||||
|
X += c1*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3))
|
||||||
|
frc2[bu1,id2,ru1] -= c1*projalg((ga/h3)*g2)
|
||||||
|
end
|
||||||
|
if TWH4
|
||||||
|
frc1[b,id1,r] -= projalg(ztw*c1,U[b,id1,r]*g1/h4)
|
||||||
|
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(g1/h4)*U[b,id1,r])
|
||||||
|
frc2[bu2,id1,ru2] += projalg(ztw*c1,h4\U[b,id1,r]*g1)
|
||||||
|
else
|
||||||
|
frc1[b,id1,r] -= c1*projalg(U[b,id1,r]*g1/h4)
|
||||||
|
frc2[bu1,id2,ru1] -= c1*projalg((g1/h4)*U[b,id1,r])
|
||||||
|
frc2[bu2,id1,ru2] += c1*projalg(h4\U[b,id1,r]*g1)
|
||||||
|
end
|
||||||
|
frc1[b,id1,r] -= X
|
||||||
|
frc1[b,id2,r] += X
|
||||||
|
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -388,4 +949,3 @@ function force_pln!(frc1, ftmp, U, Ubnd, cG, ztw, lp::SpaceParm, c0=1)
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,7 @@
|
||||||
Given an algebra field with natural indexing, this routine sets the components to random Gaussian distributed values. If SF boundary conditions are used, the force at the boundaries is set to zero.
|
Given an algebra field with natural indexing, this routine sets the components to random Gaussian distributed values. If SF boundary conditions are used, the force at the boundaries is set to zero.
|
||||||
"""
|
"""
|
||||||
function randomize!(f, lp::SpaceParm, ymws::YMworkspace)
|
function randomize!(f, lp::SpaceParm, ymws::YMworkspace)
|
||||||
|
|
||||||
if ymws.ALG == SU2alg
|
if ymws.ALG == SU2alg
|
||||||
@timeit "Randomize SU(2) algebra field" begin
|
@timeit "Randomize SU(2) algebra field" begin
|
||||||
m = CUDA.randn(ymws.PRC, lp.bsz,lp.ndim,3,lp.rsz)
|
m = CUDA.randn(ymws.PRC, lp.bsz,lp.ndim,3,lp.rsz)
|
||||||
|
@ -54,31 +54,44 @@ function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::SpaceParm{N,M,BC_PERIODI
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::SpaceParm{N,M,B,D}) where {T,N,M,B,D}
|
function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,N,M,D}
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
b = Int64(CUDA.threadIdx().x)
|
||||||
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
for id in 1:lp.ndim
|
||||||
|
frc[b,id,r] = SU3alg(m[b,id,1,r], m[b,id,2,r], m[b,id,3,r],
|
||||||
|
m[b,id,4,r], m[b,id,5,r], m[b,id,6,r],
|
||||||
|
m[b,id,7,r], m[b,id,8,r])
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {T,N,M,D}
|
||||||
|
|
||||||
@inbounds begin
|
@inbounds begin
|
||||||
b = Int64(CUDA.threadIdx().x)
|
b = Int64(CUDA.threadIdx().x)
|
||||||
r = Int64(CUDA.blockIdx().x)
|
r = Int64(CUDA.blockIdx().x)
|
||||||
it = point_time((b,r), lp)
|
it = point_time((b,r), lp)
|
||||||
|
|
||||||
if ((B==BC_SF_AFWB)||(B==BC_SF_ORBI))
|
if it == 1
|
||||||
if it == 1
|
for id in 1:lp.ndim-1
|
||||||
for id in 1:lp.ndim-1
|
frc[b,id,r] = zero(T)
|
||||||
frc[b,id,r] = zero(T)
|
end
|
||||||
end
|
frc[b,N,r] = SU3alg(m[b,N,1,r], m[b,N,2,r], m[b,N,3,r],
|
||||||
frc[b,N,r] = SU3alg(m[b,N,1,r], m[b,N,2,r], m[b,N,3,r],
|
m[b,N,4,r], m[b,N,5,r], m[b,N,6,r],
|
||||||
m[b,N,4,r], m[b,N,5,r], m[b,N,6,r],
|
m[b,N,7,r], m[b,N,8,r])
|
||||||
m[b,N,7,r], m[b,N,8,r])
|
else
|
||||||
else
|
for id in 1:lp.ndim
|
||||||
for id in 1:lp.ndim
|
frc[b,id,r] = SU3alg(m[b,id,1,r], m[b,id,2,r], m[b,id,3,r],
|
||||||
frc[b,id,r] = SU3alg(m[b,id,1,r], m[b,id,2,r], m[b,id,3,r],
|
m[b,id,4,r], m[b,id,5,r], m[b,id,6,r],
|
||||||
m[b,id,4,r], m[b,id,5,r], m[b,id,6,r],
|
m[b,id,7,r], m[b,id,8,r])
|
||||||
m[b,id,7,r], m[b,id,8,r])
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -134,7 +134,8 @@ function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::S
|
||||||
r = Int64(CUDA.blockIdx().x)
|
r = Int64(CUDA.blockIdx().x)
|
||||||
it = point_time((b, r), lp)
|
it = point_time((b, r), lp)
|
||||||
|
|
||||||
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) )
|
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) )
|
||||||
|
OBC = (B == BC_OPEN)
|
||||||
|
|
||||||
@inbounds for id in 1:N
|
@inbounds for id in 1:N
|
||||||
bu, ru = up((b,r), id, lp)
|
bu, ru = up((b,r), id, lp)
|
||||||
|
@ -152,13 +153,21 @@ function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::S
|
||||||
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
|
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
|
||||||
projalg(U[b,id,r]*X/U[b,id,r]))
|
projalg(U[b,id,r]*X/U[b,id,r]))
|
||||||
end
|
end
|
||||||
else
|
end
|
||||||
|
if OBC
|
||||||
|
if (it > 1) && (it < lp.iL[end])
|
||||||
|
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
|
||||||
|
projalg(U[b,id,r]*X/U[b,id,r]))
|
||||||
|
elseif ((it == lp.iL[end]) || (it == 1)) && (id < N)
|
||||||
|
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
|
||||||
|
projalg(U[b,id,r]*X/U[b,id,r]))
|
||||||
|
end
|
||||||
|
else
|
||||||
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
|
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
|
||||||
projalg(U[b,id,r]*X/U[b,id,r]))
|
projalg(U[b,id,r]*X/U[b,id,r]))
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -264,7 +273,8 @@ function Eoft_plaq(Eslc, U, gp::GaugeParm{T,G,NN}, lp::SpaceParm{N,M,B,D}, ymws:
|
||||||
@timeit "E(t) plaquette measurement" begin
|
@timeit "E(t) plaquette measurement" begin
|
||||||
|
|
||||||
ztw = ztwist(gp, lp)
|
ztw = ztwist(gp, lp)
|
||||||
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) )
|
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) )
|
||||||
|
OBC = (B == BC_OPEN)
|
||||||
|
|
||||||
tp = ntuple(i->i, N-1)
|
tp = ntuple(i->i, N-1)
|
||||||
V3 = prod(lp.iL[1:end-1])
|
V3 = prod(lp.iL[1:end-1])
|
||||||
|
@ -285,6 +295,10 @@ function Eoft_plaq(Eslc, U, gp::GaugeParm{T,G,NN}, lp::SpaceParm{N,M,B,D}, ymws:
|
||||||
if !SFBC
|
if !SFBC
|
||||||
Eslc[1,ipl] = Etmp[1] + Etmp[end]
|
Eslc[1,ipl] = Etmp[1] + Etmp[end]
|
||||||
end
|
end
|
||||||
|
if OBC ## Check normalization of timelike boundary plaquettes
|
||||||
|
Eslc[end,ipl] = Etmp[end-1]
|
||||||
|
Eslc[1,ipl] = Etmp[1]
|
||||||
|
end
|
||||||
else
|
else
|
||||||
for it in 1:lp.iL[end]
|
for it in 1:lp.iL[end]
|
||||||
Eslc[it,ipl] = 2*Etmp[it]
|
Eslc[it,ipl] = 2*Etmp[it]
|
||||||
|
@ -327,7 +341,6 @@ function krnl_plaq_pln!(plx, U::AbstractArray{T}, Ubnd, ztw, ipl, lp::SpaceParm{
|
||||||
plx[I] = tr(U[b,id1,r]*gt / (U[b,id2,r]*U[bu2,id1,ru2]))
|
plx[I] = tr(U[b,id1,r]*gt / (U[b,id2,r]*U[bu2,id1,ru2]))
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -350,21 +363,18 @@ function Qtop(Qslc, U, gp::GaugeParm, lp::SpaceParm{4,M,B,D}, ymws::YMworkspace)
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_qd!(ymws.rm, -, ymws.frc1, ymws.frc2, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_qd!(ymws.rm, -, ymws.frc1, ymws.frc2, lp)
|
||||||
end
|
end
|
||||||
|
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, 2,4, ztw[2], ztw[4], lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, 2,4, ztw[2], ztw[4], lp)
|
||||||
end
|
end
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_qd!(ymws.rm, +, ymws.frc1, ymws.frc2, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_qd!(ymws.rm, +, ymws.frc1, ymws.frc2, lp)
|
||||||
end
|
end
|
||||||
|
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, 3,6, ztw[3], ztw[6], lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, 3,6, ztw[3], ztw[6], lp)
|
||||||
end
|
end
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_qd!(ymws.rm, -, ymws.frc1, ymws.frc2, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_qd!(ymws.rm, -, ymws.frc1, ymws.frc2, lp)
|
||||||
end
|
end
|
||||||
|
|
||||||
Qslc .= reshape(Array(CUDA.reduce(+, ymws.rm; dims=tp)),lp.iL[end])./(32*pi^2)
|
Qslc .= reshape(Array(CUDA.reduce(+, ymws.rm; dims=tp)),lp.iL[end])./(32*pi^2)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -445,7 +455,7 @@ function krnl_add_et!(rm, frc1, lp::SpaceParm{4,M,B,D}) where {M,B,D}
|
||||||
I = point_coord((b,r), lp)
|
I = point_coord((b,r), lp)
|
||||||
rm[I] = dot(X1,X1)
|
rm[I] = dot(X1,X1)
|
||||||
end
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -474,6 +484,7 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T},
|
||||||
#First plane
|
#First plane
|
||||||
id1, id2 = lp.plidx[ipl1]
|
id1, id2 = lp.plidx[ipl1]
|
||||||
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
|
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
|
||||||
|
OBC = ((B == BC_OPEN) && (id1 == 4))
|
||||||
TWP = ((I[id1]==1)&&(I[id2]==1))
|
TWP = ((I[id1]==1)&&(I[id2]==1))
|
||||||
|
|
||||||
bu1, ru1 = up((b, r), id1, lp)
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
|
@ -493,6 +504,11 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T},
|
||||||
frc1[bu1,2,ru1] = zero(TA)
|
frc1[bu1,2,ru1] = zero(TA)
|
||||||
frc1[bd,3,rd] = zero(TA)
|
frc1[bd,3,rd] = zero(TA)
|
||||||
frc1[bu2,4,ru2] = projalg(l2*l1)
|
frc1[bu2,4,ru2] = projalg(l2*l1)
|
||||||
|
elseif OBC && (it == lp.iL[end])
|
||||||
|
frc1[b,1,r] = projalg(U[b,id1,r]*l1/U[b,id2,r])
|
||||||
|
frc1[bu1,2,ru1] = zero(TA)
|
||||||
|
frc1[bd,3,rd] = zero(TA)
|
||||||
|
frc1[bu2,4,ru2] = projalg(l2*l1)
|
||||||
else
|
else
|
||||||
if TWP
|
if TWP
|
||||||
frc1[b,1,r] = projalg(ztw1, U[b,id1,r]*l1/U[b,id2,r])
|
frc1[b,1,r] = projalg(ztw1, U[b,id1,r]*l1/U[b,id2,r])
|
||||||
|
@ -510,6 +526,7 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T},
|
||||||
# Second plane
|
# Second plane
|
||||||
id1, id2 = lp.plidx[ipl2]
|
id1, id2 = lp.plidx[ipl2]
|
||||||
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
|
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
|
||||||
|
OBC = ((B == BC_OPEN) && (id1 == 4))
|
||||||
TWP = ((I[id1]==1)&&(I[id2]==1))
|
TWP = ((I[id1]==1)&&(I[id2]==1))
|
||||||
|
|
||||||
bu1, ru1 = up((b, r), id1, lp)
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
|
@ -529,6 +546,11 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T},
|
||||||
frc2[bu1,2,ru1] = zero(TA)
|
frc2[bu1,2,ru1] = zero(TA)
|
||||||
frc2[bd,3,rd] = zero(TA)
|
frc2[bd,3,rd] = zero(TA)
|
||||||
frc2[bu2,4,ru2] = projalg(l2*l1)
|
frc2[bu2,4,ru2] = projalg(l2*l1)
|
||||||
|
elseif OBC && (it == lp.iL[end])
|
||||||
|
frc1[b,1,r] = projalg(U[b,id1,r]*l1/U[b,id2,r])
|
||||||
|
frc1[bu1,2,ru1] = zero(TA)
|
||||||
|
frc1[bd,3,rd] = zero(TA)
|
||||||
|
frc1[bu2,4,ru2] = projalg(l2*l1)
|
||||||
else
|
else
|
||||||
if TWP
|
if TWP
|
||||||
frc2[b,1,r] = projalg(ztw2, U[b,id1,r]*l1/U[b,id2,r])
|
frc2[b,1,r] = projalg(ztw2, U[b,id1,r]*l1/U[b,id2,r])
|
||||||
|
@ -543,7 +565,5 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T},
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue