OBC Branch Merge

This commit is contained in:
Fernando P.Panadero 2024-06-20 16:32:40 +02:00
parent d026a17b44
commit bc06079664
7 changed files with 1726 additions and 808 deletions

View file

@ -105,500 +105,6 @@ struct DiracWorkspace{T}
end
export DiracWorkspace, DiracParam
"""
function Csw!(dws, U, gp, lp::SpaceParm)
Computes the clover and stores it in dws.csw.
"""
function Csw!(dws, U, gp, lp::SpaceParm{4,6,B,D}) where {B,D}
@timeit "Csw computation" begin
for i in 1:Int(lp.npls)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_csw!(dws.csw, U, gp.Ubnd, i, lp)
end
end
end
return nothing
end
function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, lp::SpaceParm{4,M,B,D}) where {T,M,B,D}
@inbounds begin
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
it = I[4]
id1, id2 = lp.plidx[ipl]
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
bu1, ru1 = up((b, r), id1, lp)
bu2, ru2 = up((b, r), id2, lp)
bd1, rd1 = dw((b, r), id1, lp)
bd2, rd2 = dw((b, r), id2, lp)
bdd, rdd = dw((bd1, rd1), id2, lp)
bud, rud = dw((bu1, ru1), id2, lp)
bdu, rdu = up((bd1, rd1), id2, lp)
if SFBC && (it == lp.iL[end])
gt1 = Ubnd[id2]
gt2 = Ubnd[id2]
else
gt1 = U[bu1,id2,ru1]
gt2 = U[bud,id2,rud]
end
M1 = U[b,id1,r]*gt1/(U[b,id2,r]*U[bu2,id1,ru2])
M2 = (U[bd2,id2,rd2]\(U[bd2,id1,rd2]*gt2))/U[b,id1,r]
M3 = (U[bdd,id2,rdd]*U[bd1,id1,rd1])\(U[bdd,id1,rdd]*U[bd2,id2,rd2])
M4 = (U[b,id2,r]/(U[bd1,id2,rd1]*U[bdu,id1,rdu]))*U[bd1,id1,rd1]
if !(SFBC && (it == 1))
csw[b,ipl,r] = 0.125*(antsym(M1)+antsym(M2)+antsym(M3)+antsym(M4))
end
end
return nothing
end
"""
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
Computes the Dirac operator (with the Wilson term) `\`\``D_w``\`\` with gauge field U and parameters `dpar` of the field `si` and stores it in `so`.
If `dpar.csw` is different from zero, the clover term should be stored in `dws.csw` via the Csw! function and is automatically included in the operator.
"""
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
if abs(dpar.csw) > 1.0E-10
@timeit "Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
end
end
else
@timeit "Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, lp)
end
end
end
return nothing
end
function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r]+ im*tm*dmul(Gamma{5},si[b,r]) + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
end
return nothing
end
function krnl_Dw!(so, U, si, m0, tm, th, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r])
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
end
return nothing
end
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
if abs(dpar.csw) > 1.0E-10
@timeit "Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
end
end
else
@timeit "Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
end
end
end
return nothing
end
function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
# The field si is assumed to be zero at t = 0
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
if (point_time((b,r),lp) != 1)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r]) + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
so[b,r] += (ct-1.0)*si[b,r]
end
end
end
return nothing
end
function krnl_Dw!(so, U, si, m0, tm, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
# The field si is assumed to be zero at t = 0
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
if (point_time((b,r),lp) != 1)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r])
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
so[b,r] += (ct-1.0)*si[b,r]
end
end
end
return nothing
end
"""
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
Computes \`\` \\gamma_5 \`\` times the Dirac operator (with the Wilson term) with gauge field U and parameters `dpar` of the field `si` and stores it in `so`.
If `dpar.csw` is different from zero, the clover term should be stored in `dws.csw` via the Csw! function and is automatically included in the operator.
"""
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
if abs(dpar.csw) > 1.0E-10
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
end
end
else
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, lp)
end
end
end
return nothing
end
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
so[b,r] = dmul(Gamma{5}, so[b,r])+ im*tm*si[b,r]
end
return nothing
end
function krnl_g5Dw!(so, U, si, m0, tm, th, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r]
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
so[b,r] = dmul(Gamma{5}, so[b,r]) + im*tm*si[b,r]
end
return nothing
end
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
if abs(dpar.csw) > 1.0E-10
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
end
end
else
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
end
end
end
return nothing
end
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
# The field si is assumed to be zero at t = 0
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
if (point_time((b,r),lp) != 1)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
so[b,r] += (ct-1.0)*si[b,r]
end
end
end
so[b,r] = dmul(Gamma{5}, so[b,r])+ im*tm*si[b,r]
return nothing
end
function krnl_g5Dw!(so, U, si, m0, tm, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
# The field si is assumed to be zero at t = 0
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
if (point_time((b,r),lp) != 1)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r]
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
so[b,r] += (ct-1.0)*si[b,r]
end
end
end
so[b,r] = dmul(Gamma{5}, so[b,r]) + im*tm*si[b,r]
return nothing
end
"""
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
Applies the operator \`\` \\gamma_5 D_w \`\` twice to `si` and stores the result in `so`. This is equivalent to appling the operator \`\` D_w^\\dagger D_w \`\`
The Dirac operator is the same as in the functions `Dw!` and `g5Dw!`
"""
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
if abs(dpar.csw) > 1.0E-10
@timeit "DwdagDw" begin
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
end
end
SF_bndfix!(dws.st,lp)
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
end
end
SF_bndfix!(so,lp)
end
else
@timeit "DwdagDw" begin
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
end
end
SF_bndfix!(dws.st,lp)
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, dpar.ct, lp)
end
end
SF_bndfix!(so,lp)
end
end
return nothing
end
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
if abs(dpar.csw) > 1.0E-10
@timeit "DwdagDw" begin
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
end
end
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, lp)
end
end
end
else
@timeit "DwdagDw" begin
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, lp)
end
end
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, lp)
end
end
end
end
return nothing
end
"""
function mtwmdpar(dpar::DiracParam)
@ -610,108 +116,19 @@ function mtwmdpar(dpar::DiracParam{P,R}) where {P,R}
end
"""
SF_bndfix!(sp, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}})
export DiracWorkspace, DiracParam, mtwmdpar
Sets all the values of `sp` in the first time slice to zero.
"""
function SF_bndfix!(sp, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
@timeit "SF boundary fix" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(sp, lp)
end
end
return nothing
end
function krnl_sfbndfix!(sp,lp::SpaceParm)
b=Int64(CUDA.threadIdx().x)
r=Int64(CUDA.blockIdx().x)
if (point_time((b,r),lp) == 1)
sp[b,r] = 0.0*sp[b,r]
end
return nothing
end
"""
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund / SU2fund {T}}}, lp::SpaceParm, t::Int64 = 0)
Randomizes the SU2fund / SU3fund fermion field. If the argument t is present, it only randomizes that time-slice.
"""
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::SpaceParm, t::Int64 = 0) where {T}
@timeit "Randomize pseudofermion field" begin
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4) # complex generation not suported for Julia 1.5.4
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su3!(f,p,lp,t)
end
end
return nothing
end
function krnl_assign_pf_su3!(f::AbstractArray, p , lp::SpaceParm, t::Int64)
@inbounds begin
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
if t == 0
f[b,r] = Spinor(map(x->SU3fund(x[b,1,r,1] + im* x[b,1,r,2],
x[b,2,r,1] + im* x[b,2,r,2],
x[b,3,r,1] + im* x[b,3,r,2]),p))
elseif point_time((b,r),lp) == t
f[b,r] = Spinor(map(x->SU3fund(x[b,1,r,1] + im* x[b,1,r,2],
x[b,2,r,1] + im* x[b,2,r,2],
x[b,3,r,1] + im* x[b,3,r,2]),p))
end
end
return nothing
end
function pfrandomize!(f::AbstractArray{Spinor{4, SU2fund{T}}},lp::SpaceParm, t::Int64=0) where {T}
@timeit "Randomize pseudofermion field" begin
p = ntuple(i->CUDA.randn(T, lp.bsz, 2, lp.rsz,2),4) # complex generation not suported for Julia 1.5.4
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su2!(f,p,lp,t)
end
end
return nothing
end
function krnl_assign_pf_su2!(f::AbstractArray, p , lp::SpaceParm, t::Int64)
@inbounds begin
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
if t == 0
f[b,r] = Spinor(map(x->SU2fund(x[b,1,r,1] + im* x[b,1,r,2],
x[b,2,r,1] + im* x[b,2,r,2]),p))
elseif point_time((b,r),lp) == t
f[b,r] = Spinor(map(x->SU2fund(x[b,1,r,1] + im* x[b,1,r,2],
x[b,2,r,1] + im* x[b,2,r,2]),p))
end
end
return nothing
end
export Dw!, g5Dw!, DwdagDw!, SF_bndfix!, Csw!, pfrandomize!, mtwmdpar
include("Diracfields.jl")
export SF_bndfix!, Csw!, pfrandomize!
include("Diracoper.jl")
export Dw!, g5Dw!, DwdagDw!
include("DiracIO.jl")
export read_prop, save_prop, read_dpar
include("Diracflow.jl")
export Dslash_sq!, flw, backflow
export Nablanabla!, Dslash_sq!, flw, backflow
end

211
src/Dirac/Diracfields.jl Normal file
View file

@ -0,0 +1,211 @@
"""
function Csw!(dws, U, gp, lp::SpaceParm)
Computes the clover and stores it in dws.csw.
"""
function Csw!(dws, U, gp, lp::SpaceParm{4,6,B,D}) where {B,D}
@timeit "Csw computation" begin
for i in 1:Int(lp.npls)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_csw!(dws.csw, U, gp.Ubnd, i, lp)
end
end
end
return nothing
end
function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, lp::SpaceParm{4,M,B,D}) where {T,M,B,D}
@inbounds begin
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
it = I[4]
id1, id2 = lp.plidx[ipl]
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
OBC = (B == BC_OPEN) && ((it == 1) || (it == lp.iL[end]))
bu1, ru1 = up((b, r), id1, lp)
bu2, ru2 = up((b, r), id2, lp)
bd1, rd1 = dw((b, r), id1, lp)
bd2, rd2 = dw((b, r), id2, lp)
bdd, rdd = dw((bd1, rd1), id2, lp)
bud, rud = dw((bu1, ru1), id2, lp)
bdu, rdu = up((bd1, rd1), id2, lp)
if SFBC && (it == lp.iL[end])
gt1 = Ubnd[id2]
gt2 = Ubnd[id2]
else
gt1 = U[bu1,id2,ru1]
gt2 = U[bud,id2,rud]
end
M1 = U[b,id1,r]*gt1/(U[b,id2,r]*U[bu2,id1,ru2])
M2 = (U[bd2,id2,rd2]\(U[bd2,id1,rd2]*gt2))/U[b,id1,r]
M3 = (U[bdd,id2,rdd]*U[bd1,id1,rd1])\(U[bdd,id1,rdd]*U[bd2,id2,rd2])
M4 = (U[b,id2,r]/(U[bd1,id2,rd1]*U[bdu,id1,rdu]))*U[bd1,id1,rd1]
if !(SFBC && (it == 1)) && !OBC
csw[b,ipl,r] = 0.125*(antsym(M1)+antsym(M2)+antsym(M3)+antsym(M4))
end
end
return nothing
end
"""
SF_bndfix!(sp, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}})
Sets all the values of `sp` in the first time slice to zero.
"""
function SF_bndfix!(sp, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
@timeit "SF boundary fix" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(sp, lp)
end
end
return nothing
end
function krnl_sfbndfix!(sp,lp::SpaceParm)
b=Int64(CUDA.threadIdx().x)
r=Int64(CUDA.blockIdx().x)
if (point_time((b,r),lp) == 1)
sp[b,r] = 0.0*sp[b,r]
end
return nothing
end
"""
SF_bndfix!(sp, lp::SpaceParm{4,6,BC_OPEN,D})
Sets all the values of `sp` in the first and last time slice to zero.
"""
function SF_bndfix!(sp, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
@timeit "SF boundary fix" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_opbndfix!(sp, lp)
end
end
return nothing
end
function krnl_opbndfix!(sp,lp::SpaceParm)
b=Int64(CUDA.threadIdx().x)
r=Int64(CUDA.blockIdx().x)
if ((point_time((b,r),lp) == 1) || (point_time((b,r),lp) == lp.iL[end]))
sp[b,r] = 0.0*sp[b,r]
end
return nothing
end
"""
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund / SU2fund {T}}}, lp::SpaceParm, t::Int64 = 0)
Randomizes the SU2fund / SU3fund fermion field. If the argument t is present, it only randomizes that time-slice.
"""
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::SpaceParm{4,6,BC_PERIODIC,D}, t::Int64 = 0) where {T,D}
@timeit "Randomize pseudofermion field" begin
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4)./sqrt(2) # complex generation not suported for Julia 1.5.4
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su3!(f,p,lp,t)
end
end
return nothing
end
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}, t::Int64 = 0) where {T,D}
@timeit "Randomize pseudofermion field" begin
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4)./sqrt(2) # complex generation not suported for Julia 1.5.4
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su3!(f,p,lp,t)
end
end
SF_bndfix!(f,lp)
return nothing
end
function krnl_assign_pf_su3!(f::AbstractArray, p , lp::SpaceParm, t::Int64)
@inbounds begin
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
if t == 0
f[b,r] = Spinor(map(x->SU3fund(x[b,1,r,1] + im* x[b,1,r,2],
x[b,2,r,1] + im* x[b,2,r,2],
x[b,3,r,1] + im* x[b,3,r,2]),p))
elseif point_time((b,r),lp) == t
f[b,r] = Spinor(map(x->SU3fund(x[b,1,r,1] + im* x[b,1,r,2],
x[b,2,r,1] + im* x[b,2,r,2],
x[b,3,r,1] + im* x[b,3,r,2]),p))
end
end
return nothing
end
function pfrandomize!(f::AbstractArray{Spinor{4, SU2fund{T}}}, lp::SpaceParm{4,6,BC_PERIODIC,D}, t::Int64 = 0) where {T,D}
@timeit "Randomize pseudofermion field" begin
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4)./sqrt(2) # complex generation not suported for Julia 1.5.4
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su2!(f,p,lp,t)
end
end
return nothing
end
function pfrandomize!(f::AbstractArray{Spinor{4, SU2fund{T}}}, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}, t::Int64 = 0) where {T,D}
@timeit "Randomize pseudofermion field" begin
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4)./sqrt(2) # complex generation not suported for Julia 1.5.4
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su2!(f,p,lp,t)
end
end
SF_bndfix!(f,lp)
return nothing
end
function krnl_assign_pf_su2!(f::AbstractArray, p , lp::SpaceParm, t::Int64)
@inbounds begin
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
if t == 0
f[b,r] = Spinor(map(x->SU2fund(x[b,1,r,1] + im* x[b,1,r,2],
x[b,2,r,1] + im* x[b,2,r,2]),p))
elseif point_time((b,r),lp) == t
f[b,r] = Spinor(map(x->SU2fund(x[b,1,r,1] + im* x[b,1,r,2],
x[b,2,r,1] + im* x[b,2,r,2]),p))
end
end
return nothing
end

View file

@ -30,7 +30,7 @@ function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::D
ymws.mom .= int.e0[k].*ymws.mom .+ int.e1[k].*ymws.frc1
U .= expm.(U, ymws.mom, 2*eps)
end
end
end
end
@ -86,7 +86,7 @@ function backflow(psi, U, Dt, maxnsave::Int64, gp::GaugeParm, dpar::DiracParam,
@timeit "CPU to GPU" copyto!(U,U0)
for j in dsave:-1:1
@timeit "CPU to GPU" copyto!(U,U0)
@timeit "CPU to GPU" copyto!(U,U0)
for k in 1:j-1
flw(U, int, 1, eps_all[k], gp, lp, ymws)
end
@ -154,83 +154,6 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam
return nothing
end
"""
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
Computes /`/` \\nabla^* \\nabla /`/` `si` and stores it in `si`.
"""
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
@timeit "Laplacian" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp)
end
end
return nothing
end
function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
@inbounds begin
so[b,r] = -4*si[b,r]
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
end
return nothing
end
function krnl_Nablanabla(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
@inbounds begin
if (point_time((b,r),lp) != 1)
so[b,r] = -4*si[b,r]
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
end
end
return nothing
end
function flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T}
@ -278,13 +201,123 @@ end
flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T} = flw_adapt(U, psi, int, tend, int.eps_ini, gp, dpar, lp, ymws, dws)
"""
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
Computes /`/` \\nabla^* \\nabla /`/` `si` and stores it in `si`.
"""
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
@timeit "Laplacian" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp)
end
end
return nothing
end
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}) where {D}
SF_bndfix!(si,lp)
@timeit "Laplacian" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp)
end
end
SF_bndfix!(so,lp)
return nothing
end
function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
@inbounds begin
if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end]))
so[b,r] = -4*si[b,r]
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
end
end
return nothing
end
function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
@inbounds begin
so[b,r] = -4*si[b,r]
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
end
return nothing
end
function krnl_Nablanabla(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
@inbounds begin
if (point_time((b,r),lp) != 1)
so[b,r] = -4*si[b,r]
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
end
end
return nothing
end
export Nablanabla!, flw, backflow, flw_adapt, bflw_step!
"""
function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
Computes /`/` //slashed{D}^2 si /`/` ans stores it in `si`.
@ -292,40 +325,40 @@ Computes /`/` //slashed{D}^2 si /`/` ans stores it in `si`.
"""
function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
@timeit "DwdagDw" begin
@timeit "DwdagDw" begin
@timeit "g5Dslsh" begin
@timeit "g5Dslsh" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(dws.st, U, si, dpar.th, lp)
end
end
end
if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(dws.st, dws.csw, dpar.csw, si, lp)
end
if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(dws.st, dws.csw, dpar.csw, si, lp)
end
end
end
@timeit "g5Dslsh" begin
@timeit "g5Dslsh" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(so, U, dws.st, dpar.th, lp)
end
end
end
if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(so, dws.csw, dpar.csw, dws.st, lp)
end
if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(so, dws.csw, dpar.csw, dws.st, lp)
end
end
end
end
return nothing
end
@ -349,12 +382,12 @@ function krnl_g5Dslsh!(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},Spac
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
so[b,r] = dmul(Gamma{5}, so[b,r])
so[b,r] = dmul(Gamma{5}, so[b,r])
end
end
return nothing
@ -369,19 +402,19 @@ function krnl_g5Dslsh!(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {D,B}
so[b,r] = 4*si[b,r]
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
so[b,r] = dmul(Gamma{5}, so[b,r])
end
@ -393,11 +426,11 @@ function krnl_g5Dslsh_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,
@inbounds begin
b = Int64(CUDA.threadIdx().x);
r = Int64(CUDA.blockIdx().x)
b = Int64(CUDA.threadIdx().x);
r = Int64(CUDA.blockIdx().x)
so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
end
return nothing
@ -409,15 +442,15 @@ function krnl_g5Dslsh_impr!(so, Fcsw, csw, si, lp::Union{SpaceParm{4,6,BC_SF_ORB
@inbounds begin
b = Int64(CUDA.threadIdx().x);
r = Int64(CUDA.blockIdx().x)
b = Int64(CUDA.threadIdx().x);
r = Int64(CUDA.blockIdx().x)
if (point_time((b,r),lp) != 1)
if (point_time((b,r),lp) != 1)
so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
end
so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
end
return nothing
return nothing
end
end

664
src/Dirac/Diracoper.jl Normal file
View file

@ -0,0 +1,664 @@
## OPEN
"""
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
Computes the Dirac operator (with the Wilson term) `\`\``D_w``\`\` with gauge field U and parameters `dpar` of the field `si` and stores it in `so`.
If `dpar.csw` is different from zero, the clover term should be stored in `dws.csw` via the Csw! function and is automatically included in the operator.
"""
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
SF_bndfix!(si,lp)
if abs(dpar.csw) > 1.0E-10
@timeit "Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
end
end
else
@timeit "Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
end
end
end
SF_bndfix!(so,lp)
return nothing
end
function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
# The field si is assumed to be zero at t = 0,T
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end]))
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r]) + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == (lp.iL[4]-1))
so[b,r] += (ct-1.0)*si[b,r]
end
end
end
return nothing
end
function krnl_Dw!(so, U, si, m0, tm, th, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
# The field si is assumed to be zero at t = 0,T
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end]))
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r])
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == (lp.iL[4]-1))
so[b,r] += (ct-1.0)*si[b,r]
end
end
end
return nothing
end
"""
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
Computes \`\` \\gamma_5 \`\` times the Dirac operator (with the Wilson term) with gauge field U and parameters `dpar` of the field `si` and stores it in `so`.
If `dpar.csw` is different from zero, the clover term should be stored in `dws.csw` via the Csw! function and is automatically included in the operator.
"""
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
SF_bndfix!(si,lp)
if abs(dpar.csw) > 1.0E-10
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
end
end
else
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
end
end
end
SF_bndfix!(so,lp)
return nothing
end
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
# The field si is assumed to be zero at t = 0,T
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end]))
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == (lp.iL[4]-1))
so[b,r] += (ct-1.0)*si[b,r]
end
end
end
so[b,r] = dmul(Gamma{5}, so[b,r])+ im*tm*si[b,r]
return nothing
end
function krnl_g5Dw!(so, U, si, m0, tm, th, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
# The field si is assumed to be zero at t = 0,T
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end]))
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r]
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == (lp.iL[4]-1))
so[b,r] += (ct-1.0)*si[b,r]
end
end
end
so[b,r] = dmul(Gamma{5}, so[b,r]) + im*tm*si[b,r]
return nothing
end
"""
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
Applies the operator \`\` \\gamma_5 D_w \`\` twice to `si` and stores the result in `so`. This is equivalent to appling the operator \`\` D_w^\\dagger D_w \`\`
The Dirac operator is the same as in the functions `Dw!` and `g5Dw!`
"""
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
SF_bndfix!(si,lp)
if abs(dpar.csw) > 1.0E-10
@timeit "DwdagDw" begin
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
end
end
SF_bndfix!(dws.st,lp)
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
end
end
SF_bndfix!(so,lp)
end
else
@timeit "DwdagDw" begin
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
end
end
SF_bndfix!(dws.st,lp)
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, dpar.ct, lp)
end
end
SF_bndfix!(so,lp)
end
end
return nothing
end
## PERDIODIC
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
if abs(dpar.csw) > 1.0E-10
@timeit "Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
end
end
else
@timeit "Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, lp)
end
end
end
return nothing
end
function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r]+ im*tm*dmul(Gamma{5},si[b,r]) + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
end
return nothing
end
function krnl_Dw!(so, U, si, m0, tm, th, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r])
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
end
return nothing
end
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
if abs(dpar.csw) > 1.0E-10
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
end
end
else
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, lp)
end
end
end
return nothing
end
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
so[b,r] = dmul(Gamma{5}, so[b,r])+ im*tm*si[b,r]
end
return nothing
end
function krnl_g5Dw!(so, U, si, m0, tm, th, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r]
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
so[b,r] = dmul(Gamma{5}, so[b,r]) + im*tm*si[b,r]
end
return nothing
end
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
if abs(dpar.csw) > 1.0E-10
@timeit "DwdagDw" begin
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
end
end
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, lp)
end
end
end
else
@timeit "DwdagDw" begin
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, lp)
end
end
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, lp)
end
end
end end
return nothing
end
## SF
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
SF_bndfix!(si,lp)
if abs(dpar.csw) > 1.0E-10
@timeit "Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
end
end
else
@timeit "Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
end
end
end
return nothing
end
function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
# The field si is assumed to be zero at t = 0
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
if (point_time((b,r),lp) != 1)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r]) + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
so[b,r] += (ct-1.0)*si[b,r]
end
end
end
return nothing
end
function krnl_Dw!(so, U, si, m0, tm, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
# The field si is assumed to be zero at t = 0
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
if (point_time((b,r),lp) != 1)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r])
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
so[b,r] += (ct-1.0)*si[b,r]
end
end
end
return nothing
end
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
SF_bndfix!(si,lp)
if abs(dpar.csw) > 1.0E-10
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
end
end
else
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
end
end
end
return nothing
end
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
# The field si is assumed to be zero at t = 0
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
if (point_time((b,r),lp) != 1)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
so[b,r] += (ct-1.0)*si[b,r]
end
end
end
so[b,r] = dmul(Gamma{5}, so[b,r])+ im*tm*si[b,r]
return nothing
end
function krnl_g5Dw!(so, U, si, m0, tm, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
# The field si is assumed to be zero at t = 0
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
if (point_time((b,r),lp) != 1)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r]
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
so[b,r] += (ct-1.0)*si[b,r]
end
end
end
so[b,r] = dmul(Gamma{5}, so[b,r]) + im*tm*si[b,r]
return nothing
end
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
if abs(dpar.csw) > 1.0E-10
@timeit "DwdagDw" begin
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
end
end
SF_bndfix!(dws.st,lp)
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
end
end
SF_bndfix!(so,lp)
end
else
@timeit "DwdagDw" begin
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
end
end
SF_bndfix!(dws.st,lp)
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, dpar.ct, lp)
end
end
SF_bndfix!(so,lp)
end
end
return nothing
end

View file

@ -9,7 +9,322 @@
### created: Mon Jul 12 18:31:19 2021
###
function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, ztw, lp::SpaceParm{N,M,B,D}) where {T,NB,N,M,B,D}
##
## OPEN
##
function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, ztw, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,NB,N,M,D}
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
it = I[N]
ipl = 0
S = zero(eltype(plx))
@inbounds begin
for id1 in N:-1:1
bu1, ru1 = up((b, r), id1, lp)
TOBC = (id1==N)
for id2 = 1:id1-1
bu2, ru2 = up((b, r), id2, lp)
ipl = ipl + 1
TWP = (I[id1]==1) && (I[id2]==1)
TWH2 = TWP || ( (I[id1]==lp.iL[id1]) && (I[id2]==1) )
TWH3 = TWP || ( (I[id1]==1) && (I[id2]==lp.iL[id2]) )
# H2 staple
(b1, r1) = up((b,r), id1, lp)
(b2, r2) = up((b1,r1), id1, lp)
gb = U[b2,id2,r2]
(b2, r2) = up((b1,r1), id2, lp)
h2 = (U[b1,id1,r1]*gb)/U[b2,id1,r2]
# H3 staple
(b1, r1) = up((b,r), id2, lp)
(b2, r2) = up((b1,r1), id2, lp)
(b3, r3) = up((b1,r1), id1, lp)
gc = U[b3,id2,r3]
h3 = (U[b1,id2,r1]*U[b2,id1,r2])/gc
# END staples
ga = U[bu1,id2,ru1]
g2 = U[b,id2,r]\U[b,id1,r]
if ( (it == lp.iL[end]) || (it == 1) ) && !TOBC
S += 0.5*cG*(c0*tr(g2*ga/U[bu2,id1,ru2]) + c1*tr(g2*ga/h3) + c1*tr(g2*h2/U[bu2,id1,ru2]))
elseif (it == lp.iL[end]-1) && TOBC
S += c0*tr(g2*ga/U[bu2,id1,ru2]) + c1*tr(g2*ga/h3)
elseif (it == lp.iL[end]) && TOBC
nothing
else
if TWP
S += (ztw[ipl]*c0)*tr(g2*ga/U[bu2,id1,ru2])
else
S += c0*tr(g2*ga/U[bu2,id1,ru2])
end
if TWH2
S += (ztw[ipl]*c1)*tr(g2*h2/U[bu2,id1,ru2])
else
S += c1*tr(g2*h2/U[bu2,id1,ru2])
end
if TWH3
S += (ztw[ipl]*c1)*tr(g2*ga/h3)
else
S += c1*tr(g2*ga/h3)
end
end
end
end
plx[I] = S
end
return nothing
end
function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,N,M,D}
@inbounds begin
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
it = I[N]
S = zero(eltype(plx))
ipl = 0
for id1 in N:-1:1
bu1, ru1 = up((b, r), id1, lp)
TOBC = (id1==N)
for id2 = 1:id1-1
bu2, ru2 = up((b, r), id2, lp)
ipl = ipl + 1
TWP = (I[id1]==1) && (I[id2]==1)
gt1 = U[bu1,id2,ru1]
if ( (it == lp.iL[end]) || (it == 1)) && !TOBC
S += 0.5*cG*(tr(U[b,id1,r]*gt1 / (U[b,id2,r]*U[bu2,id1,ru2])))
elseif (it == lp.iL[end]) && TOBC
nothing
else
if TWP
S += ztw[ipl]*tr(U[b,id1,r]*gt1 / (U[b,id2,r]*U[bu2,id1,ru2]))
else
S += tr(U[b,id1,r]*gt1 / (U[b,id2,r]*U[bu2,id1,ru2]))
end
end
end
end
plx[I] = S
end
return nothing
end
function krnl_force_wilson_pln!(frc1, frc2, U::AbstractArray{T}, Ubnd, cG, ztw, ipl, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,N,M,D}
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
it = I[N]
@inbounds begin
id1, id2 = lp.plidx[ipl]
bu1, ru1 = up((b, r), id1, lp)
bu2, ru2 = up((b, r), id2, lp)
TWP = (I[id1]==1)&&(I[id2]==1)
TOBC = (id1 == N)
gt1 = U[bu1,id2,ru1]
g1 = gt1/U[bu2,id1,ru2]
g2 = U[b,id2,r]\U[b,id1,r]
if !TOBC && ( (it == 1) || (it == lp.iL[end]) )
X = 0.5*cG*projalg(U[b,id1,r]*g1/U[b,id2,r])
frc1[b ,id1, r ] -= X
frc1[b ,id2, r ] += X
frc2[bu1,id2,ru1] -= 0.5*cG*projalg(g1*g2)
frc2[bu2,id1,ru2] += 0.5*cG*projalg(g2*g1)
elseif TOBC && (it == lp.iL[end])
nothing
else
if TWP
X = projalg(ztw,U[b,id1,r]*g1/U[b,id2,r])
frc2[bu1,id2,ru1] -= projalg(ztw,g1*g2)
frc2[bu2,id1,ru2] += projalg(ztw,g2*g1)
else
X = projalg(U[b,id1,r]*g1/U[b,id2,r])
frc2[bu1,id2,ru1] -= projalg(g1*g2)
frc2[bu2,id1,ru2] += projalg(g2*g1)
end
frc1[b ,id1, r ] -= X
frc1[b ,id2, r ] += X
end
end
return nothing
end
function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG, ztw, ipl, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,N,M,D}
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
it = I[N]
@inbounds begin
id1, id2 = lp.plidx[ipl]
bu1, ru1 = up((b, r), id1, lp)
bu2, ru2 = up((b, r), id2, lp)
TOBC = (id1 == N)
TWP = (I[id1]==1) && (I[id2]==1)
TWH1 = TWP || ( (I[id1]==1) && (I[id2]==2) )
TWH2 = TWP || ( (I[id1]==lp.iL[id1]) && (I[id2]==1) )
TWH3 = TWP || ( (I[id1]==1) && (I[id2]==lp.iL[id2]) )
TWH4 = TWP || ( (I[id1]==2) && (I[id2]==1) )
# H1 staple
(b1, r1) = dw((b,r), id2, lp)
(b2, r2) = up((b1,r1), id1, lp)
gc = U[b2,id2,r2]
h1 = (U[b1,id2,r1]\U[b1,id1,r1])*gc
# H2 staple
(b1, r1) = up((b,r), id1, lp)
(b2, r2) = up((b1,r1), id1, lp)
gb = U[b2,id2,r2]
(b2, r2) = up((b1,r1), id2, lp)
h2 = (U[b1,id1,r1]*gb)/U[b2,id1,r2]
# H3 staple
(b1, r1) = up((b,r), id2, lp)
(b2, r2) = up((b1,r1), id2, lp)
(b3, r3) = up((b1,r1), id1, lp)
gc = U[b3,id2,r3]
h3 = (U[b1,id2,r1]*U[b2,id1,r2])/gc
# H4 staple
(b1, r1) = dw((b,r), id1, lp)
(b2, r2) = up((b1,r1), id2, lp)
h4 = (U[b1,id1,r1]\U[b1,id2,r1])*U[b2,id1,r2]
# END staples
ga = U[bu1,id2,ru1]
g1 = ga/U[bu2,id1,ru2]
g2 = U[b,id2,r]\U[b,id1,r]
if !TOBC && ( (it == 1) || (it == lp.iL[end]) )
X = 0.5*cG*(c0*projalg(U[b,id1,r]*g1/U[b,id2,r]) + c1*projalg(U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2])) + c1*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3)) )
frc1[b,id1,r] -= X + 0.5*cG*c1*projalg(U[b,id1,r]*g1/h4)
frc1[b,id2,r] += X + 0.5*cG*c1*projalg(h1*g1/U[b,id2,r])
frc2[bu1,id2,ru1] -= 0.5*cG*c0*projalg(g1*g2)
frc2[bu2,id1,ru2] += 0.5*cG*c0*projalg(g2*g1)
frc2[bu1,id2,ru1] -= 0.5*cG*c1*projalg((g1/U[b,id2,r])*h1)
frc2[bu2,id1,ru2] += 0.5*cG*c1*projalg((U[b,id2,r]\h1)*g1)
frc2[bu2,id1,ru2] += 0.5*cG*c1*projalg(g2*h2/U[bu2,id1,ru2])
frc2[bu1,id2,ru1] -= 0.5*cG*c1*projalg((ga/h3)*g2)
frc2[bu1,id2,ru1] -= 0.5*cG*c1*projalg((g1/h4)*U[b,id1,r])
frc2[bu2,id1,ru2] += 0.5*cG*c1*projalg(h4\U[b,id1,r]*g1)
elseif TOBC && (it == lp.iL[end])
nothing
elseif TOBC && (it == 1)
X = c0*projalg(U[b,id1,r]*g1/U[b,id2,r]) + c1*projalg(U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2])) + c1*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3))
frc1[b,id1,r] -= X
frc1[b,id2,r] += X + c1*projalg(h1*g1/U[b,id2,r])
frc2[bu1,id2,ru1] -= c0*projalg(g1*g2)
frc2[bu2,id1,ru2] += c0*projalg(g2*g1)
frc2[bu1,id2,ru1] -= c1*projalg((g1/U[b,id2,r])*h1)
frc2[bu2,id1,ru2] += c1*projalg((U[b,id2,r]\h1)*g1)
frc2[bu2,id1,ru2] += c1*projalg(g2*h2/U[bu2,id1,ru2])
frc2[bu1,id2,ru1] -= c1*projalg((ga/h3)*g2)
elseif TOBC && (it == (lp.iL[end]-1) )
X = c0*projalg(U[b,id1,r]*g1/U[b,id2,r]) + c1*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3))
frc1[b,id1,r] -= X + c1*projalg(U[b,id1,r]*g1/h4)
frc1[b,id2,r] += X + c1*projalg(h1*g1/U[b,id2,r])
frc2[bu1,id2,ru1] -= c0*projalg(g1*g2)
frc2[bu2,id1,ru2] += c0*projalg(g2*g1)
frc2[bu1,id2,ru1] -= c1*projalg((g1/U[b,id2,r])*h1)
frc2[bu2,id1,ru2] += c1*projalg((U[b,id2,r]\h1)*g1)
frc2[bu1,id2,ru1] -= c1*projalg((ga/h3)*g2)
frc2[bu1,id2,ru1] -= c1*projalg((g1/h4)*U[b,id1,r])
frc2[bu2,id1,ru2] += c1*projalg(h4\U[b,id1,r]*g1)
else
if TWP
X = projalg(c0*ztw,U[b,id1,r]*g1/U[b,id2,r])
frc2[bu1,id2,ru1] -= projalg(c0*ztw,g1*g2)
frc2[bu2,id1,ru2] += projalg(c0*ztw,g2*g1)
else
X = c0*projalg(U[b,id1,r]*g1/U[b,id2,r])
frc2[bu1,id2,ru1] -= c0*projalg(g1*g2)
frc2[bu2,id1,ru2] += c0*projalg(g2*g1)
end
if TWH1
frc1[b,id2,r] += projalg(ztw*c1,h1*g1/U[b,id2,r])
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(g1/U[b,id2,r])*h1)
frc2[bu2,id1,ru2] += projalg(ztw*c1,(U[b,id2,r]\h1)*g1)
else
frc1[b,id2,r] += c1*projalg(h1*g1/U[b,id2,r])
frc2[bu1,id2,ru1] -= c1*projalg((g1/U[b,id2,r])*h1)
frc2[bu2,id1,ru2] += c1*projalg((U[b,id2,r]\h1)*g1)
end
if TWH2
X += projalg(ztw*c1,U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2]))
frc2[bu2,id1,ru2] += projalg(ztw*c1,g2*h2/U[bu2,id1,ru2])
else
X += c1*projalg(U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2]))
frc2[bu2,id1,ru2] += c1*projalg(g2*h2/U[bu2,id1,ru2])
end
if TWH3
X += projalg(ztw*c1,U[b,id1,r]*ga/(U[b,id2,r]*h3))
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(ga/h3)*g2)
else
X += c1*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3))
frc2[bu1,id2,ru1] -= c1*projalg((ga/h3)*g2)
end
if TWH4
frc1[b,id1,r] -= projalg(ztw*c1,U[b,id1,r]*g1/h4)
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(g1/h4)*U[b,id1,r])
frc2[bu2,id1,ru2] += projalg(ztw*c1,h4\U[b,id1,r]*g1)
else
frc1[b,id1,r] -= c1*projalg(U[b,id1,r]*g1/h4)
frc2[bu1,id2,ru1] -= c1*projalg((g1/h4)*U[b,id1,r])
frc2[bu2,id1,ru2] += c1*projalg(h4\U[b,id1,r]*g1)
end
frc1[b,id1,r] -= X
frc1[b,id2,r] += X
end
end
return nothing
end
##
## SF
##
function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, ztw, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {T,NB,N,M,D}
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
@ -21,8 +336,8 @@ function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, zt
@inbounds begin
for id1 in N:-1:1
bu1, ru1 = up((b, r), id1, lp)
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1==N)
SFBC = (id1==N)
for id2 = 1:id1-1
bu2, ru2 = up((b, r), id2, lp)
ipl = ipl + 1
@ -30,7 +345,7 @@ function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, zt
TWP = (I[id1]==1) && (I[id2]==1)
TWH2 = TWP || ( (I[id1]==lp.iL[id1]) && (I[id2]==1) )
TWH3 = TWP || ( (I[id1]==1) && (I[id2]==lp.iL[id2]) )
# H2 staple
(b1, r1) = up((b,r), id1, lp)
(b2, r2) = up((b1,r1), id1, lp)
@ -39,14 +354,14 @@ function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, zt
else
gb = U[b2,id2,r2]
end
(b2, r2) = up((b1,r1), id2, lp)
h2 = (U[b1,id1,r1]*gb)/U[b2,id1,r2]
# H3 staple
(b1, r1) = up((b,r), id2, lp)
(b2, r2) = up((b1,r1), id2, lp)
(b3, r3) = up((b1,r1), id1, lp)
if SFBC && (it == lp.iL[end])
gc = Ubnd[id2]
@ -55,15 +370,15 @@ function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, zt
end
h3 = (U[b1,id2,r1]*U[b2,id1,r2])/gc
# END staples
if SFBC && (it == lp.iL[end])
ga = Ubnd[id2]
else
ga = U[bu1,id2,ru1]
end
g2 = U[b,id2,r]\U[b,id1,r]
if (it == lp.iL[end]) && SFBC
S += cG*(c0*tr(g2*ga/U[bu2,id1,ru2]) + (3*c1/2)*tr(g2*ga/h3))
elseif (it == 1) && SFBC
@ -85,17 +400,17 @@ function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, zt
S += c1*tr(g2*ga/h3)
end
end
end
end
plx[I] = S
end
return nothing
end
function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::SpaceParm{N,M,B,D}) where {T,N,M,B,D}
function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {T,N,M,D}
@inbounds begin
@ -103,21 +418,20 @@ function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::SpaceParm{N,M,B
r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
it = I[N]
IBND = ( ( (B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) &&
( (it == 1) || (it == lp.iL[end])) )
IBND = ( (it == 1) || (it == lp.iL[end]))
S = zero(eltype(plx))
ipl = 0
for id1 in N:-1:1
bu1, ru1 = up((b, r), id1, lp)
SFBND = IBND && (id1 == N)
SFBND = IBND && (id1 == N)
for id2 = 1:id1-1
bu2, ru2 = up((b, r), id2, lp)
ipl = ipl + 1
TWP = (I[id1]==1) && (I[id2]==1)
if SFBND && (it == lp.iL[end])
if SFBND && (it == lp.iL[end])
gt1 = Ubnd[id2]
else
gt1 = U[bu1,id2,ru1]
@ -134,46 +448,46 @@ function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::SpaceParm{N,M,B
end
end
end
plx[I] = S
end
return nothing
end
function krnl_force_wilson_pln!(frc1, frc2, U::AbstractArray{T}, Ubnd, cG, ztw, ipl, lp::SpaceParm{N,M,B,D}) where {T,N,M,B,D}
function krnl_force_wilson_pln!(frc1, frc2, U::AbstractArray{T}, Ubnd, cG, ztw, ipl, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {T,N,M,D}
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
it = I[N]
@inbounds begin
id1, id2 = lp.plidx[ipl]
bu1, ru1 = up((b, r), id1, lp)
bu2, ru2 = up((b, r), id2, lp)
TWP = (I[id1]==1)&&(I[id2]==1)
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == N)
SFBC = (id1 == N)
if SFBC && (it == lp.iL[end])
gt1 = Ubnd[id2]
else
gt1 = U[bu1,id2,ru1]
end
g1 = gt1/U[bu2,id1,ru2]
g2 = U[b,id2,r]\U[b,id1,r]
if SFBC && (it == 1)
X = cG*projalg(U[b,id1,r]*g1/U[b,id2,r])
frc1[b ,id1, r ] -= X
frc2[bu1,id2,ru1] -= cG*projalg(g1*g2)
frc2[bu2,id1,ru2] += cG*projalg(g2*g1)
elseif SFBC && (it == lp.iL[end])
X = cG*projalg(U[b,id1,r]*g1/U[b,id2,r])
frc1[b ,id1, r ] -= X
frc1[b ,id2, r ] += X
frc2[bu2,id1,ru2] += cG*projalg(g2*g1)
@ -191,29 +505,29 @@ function krnl_force_wilson_pln!(frc1, frc2, U::AbstractArray{T}, Ubnd, cG, ztw,
frc1[b ,id2, r ] += X
end
end
return nothing
end
function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG, ztw, ipl, lp::SpaceParm{N,M,B,D}) where {T,N,M,B,D}
function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG, ztw, ipl, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {T,N,M,D}
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
it = I[N]
@inbounds begin
id1, id2 = lp.plidx[ipl]
bu1, ru1 = up((b, r), id1, lp)
bu2, ru2 = up((b, r), id2, lp)
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == N)
SFBC = (id1 == N)
TWP = (I[id1]==1) && (I[id2]==1)
TWH1 = TWP || ( (I[id1]==1) && (I[id2]==2) )
TWH2 = TWP || ( (I[id1]==lp.iL[id1]) && (I[id2]==1) )
TWH3 = TWP || ( (I[id1]==1) && (I[id2]==lp.iL[id2]) )
TWH4 = TWP || ( (I[id1]==2) && (I[id2]==1) )
# H1 staple
(b1, r1) = dw((b,r), id2, lp)
(b2, r2) = up((b1,r1), id1, lp)
@ -223,7 +537,7 @@ function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG,
gc = U[b2,id2,r2]
end
h1 = (U[b1,id2,r1]\U[b1,id1,r1])*gc
# H2 staple
(b1, r1) = up((b,r), id1, lp)
(b2, r2) = up((b1,r1), id1, lp)
@ -232,10 +546,10 @@ function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG,
else
gb = U[b2,id2,r2]
end
(b2, r2) = up((b1,r1), id2, lp)
h2 = (U[b1,id1,r1]*gb)/U[b2,id1,r2]
# H3 staple
(b1, r1) = up((b,r), id2, lp)
(b2, r2) = up((b1,r1), id2, lp)
@ -246,42 +560,42 @@ function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG,
gc = U[b3,id2,r3]
end
h3 = (U[b1,id2,r1]*U[b2,id1,r2])/gc
# H4 staple
(b1, r1) = dw((b,r), id1, lp)
(b2, r2) = up((b1,r1), id2, lp)
h4 = (U[b1,id1,r1]\U[b1,id2,r1])*U[b2,id1,r2]
# END staples
if SFBC && (it == lp.iL[end])
ga = Ubnd[id2]
else
ga = U[bu1,id2,ru1]
end
g1 = ga/U[bu2,id1,ru2]
g2 = U[b,id2,r]\U[b,id1,r]
if SFBC && (it == 1)
X = (cG*c0)*projalg(U[b,id1,r]*g1/U[b,id2,r]) + c1*projalg(U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2])) +
(3*c1*cG/2)*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3))
(3*c1*cG/2)*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3))
frc1[b,id1,r] -= X
frc2[bu1,id2,ru1] -= (cG*c0)*projalg(g1*g2) + (3*c1*cG/2)*projalg((ga/h3)*g2) +
(3*c1*cG/2)*projalg((g1/U[b,id2,r])*h1)
frc2[bu2,id1,ru2] += (cG*c0)*projalg(g2*g1) + (3*c1*cG/2) * projalg((U[b,id2,r]\h1)*g1) +
c1*projalg(g2*h2/U[bu2,id1,ru2])
c1*projalg(g2*h2/U[bu2,id1,ru2])
elseif SFBC && (it == lp.iL[end])
X = (cG*c0)*projalg(U[b,id1,r]*g1/U[b,id2,r]) +
(3*c1*cG/2) * (projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3)))
frc1[b,id1,r] -= X + c1*projalg(U[b,id1,r]*g1/h4)
frc1[b,id2,r] += X + (3*c1*cG/2)*projalg(h1*g1/U[b,id2,r])
(3*c1*cG/2) * (projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3)))
frc1[b,id1,r] -= X + c1*projalg(U[b,id1,r]*g1/h4)
frc1[b,id2,r] += X + (3*c1*cG/2)*projalg(h1*g1/U[b,id2,r])
frc2[bu2,id1,ru2] += (cG*c0)*projalg(g2*g1) + (3*c1*cG/2) * projalg((U[b,id2,r]\h1)*g1) +
c1 * projalg(h4\U[b,id1,r]*g1)
c1 * projalg(h4\U[b,id1,r]*g1)
else
if TWP
X = projalg(c0*ztw,U[b,id1,r]*g1/U[b,id2,r])
@ -294,11 +608,11 @@ function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG,
end
if TWH1
frc1[b,id2,r] += projalg(ztw*c1,h1*g1/U[b,id2,r])
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(g1/U[b,id2,r])*h1)
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(g1/U[b,id2,r])*h1)
frc2[bu2,id1,ru2] += projalg(ztw*c1,(U[b,id2,r]\h1)*g1)
else
frc1[b,id2,r] += c1*projalg(h1*g1/U[b,id2,r])
frc2[bu1,id2,ru1] -= c1*projalg((g1/U[b,id2,r])*h1)
frc2[bu1,id2,ru1] -= c1*projalg((g1/U[b,id2,r])*h1)
frc2[bu2,id1,ru2] += c1*projalg((U[b,id2,r]\h1)*g1)
end
if TWH2
@ -310,27 +624,274 @@ function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG,
end
if TWH3
X += projalg(ztw*c1,U[b,id1,r]*ga/(U[b,id2,r]*h3))
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(ga/h3)*g2)
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(ga/h3)*g2)
else
X += c1*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3))
frc2[bu1,id2,ru1] -= c1*projalg((ga/h3)*g2)
frc2[bu1,id2,ru1] -= c1*projalg((ga/h3)*g2)
end
if TWH4
frc1[b,id1,r] -= projalg(ztw*c1,U[b,id1,r]*g1/h4)
frc1[b,id1,r] -= projalg(ztw*c1,U[b,id1,r]*g1/h4)
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(g1/h4)*U[b,id1,r])
frc2[bu2,id1,ru2] += projalg(ztw*c1,h4\U[b,id1,r]*g1)
frc2[bu2,id1,ru2] += projalg(ztw*c1,h4\U[b,id1,r]*g1)
else
frc1[b,id1,r] -= c1*projalg(U[b,id1,r]*g1/h4)
frc1[b,id1,r] -= c1*projalg(U[b,id1,r]*g1/h4)
frc2[bu1,id2,ru1] -= c1*projalg((g1/h4)*U[b,id1,r])
frc2[bu2,id1,ru2] += c1*projalg(h4\U[b,id1,r]*g1)
frc2[bu2,id1,ru2] += c1*projalg(h4\U[b,id1,r]*g1)
end
frc1[b,id1,r] -= X
frc1[b,id2,r] += X
end
end
return nothing
end
##
## PERIODIC
##
function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, ztw, lp::SpaceParm{N,M,BC_PERIODIC,D}) where {T,NB,N,M,D}
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
it = I[N]
ipl = 0
S = zero(eltype(plx))
@inbounds begin
for id1 in N:-1:1
bu1, ru1 = up((b, r), id1, lp)
for id2 = 1:id1-1
bu2, ru2 = up((b, r), id2, lp)
ipl = ipl + 1
TWP = (I[id1]==1) && (I[id2]==1)
TWH2 = TWP || ( (I[id1]==lp.iL[id1]) && (I[id2]==1) )
TWH3 = TWP || ( (I[id1]==1) && (I[id2]==lp.iL[id2]) )
# H2 staple
(b1, r1) = up((b,r), id1, lp)
(b2, r2) = up((b1,r1), id1, lp)
gb = U[b2,id2,r2]
(b2, r2) = up((b1,r1), id2, lp)
h2 = (U[b1,id1,r1]*gb)/U[b2,id1,r2]
# H3 staple
(b1, r1) = up((b,r), id2, lp)
(b2, r2) = up((b1,r1), id2, lp)
(b3, r3) = up((b1,r1), id1, lp)
gc = U[b3,id2,r3]
h3 = (U[b1,id2,r1]*U[b2,id1,r2])/gc
# END staples
ga = U[bu1,id2,ru1]
g2 = U[b,id2,r]\U[b,id1,r]
if TWP
S += (ztw[ipl]*c0)*tr(g2*ga/U[bu2,id1,ru2])
else
S += c0*tr(g2*ga/U[bu2,id1,ru2])
end
if TWH2
S += (ztw[ipl]*c1)*tr(g2*h2/U[bu2,id1,ru2])
else
S += c1*tr(g2*h2/U[bu2,id1,ru2])
end
if TWH3
S += (ztw[ipl]*c1)*tr(g2*ga/h3)
else
S += c1*tr(g2*ga/h3)
end
end
frc1[b,id1,r] -= X
frc1[b,id2,r] += X
end
plx[I] = S
end
return nothing
end
function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::SpaceParm{N,M,BC_PERIODIC,D}) where {T,N,M,D}
@inbounds begin
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
it = I[N]
S = zero(eltype(plx))
ipl = 0
for id1 in N:-1:1
bu1, ru1 = up((b, r), id1, lp)
for id2 = 1:id1-1
bu2, ru2 = up((b, r), id2, lp)
ipl = ipl + 1
TWP = (I[id1]==1) && (I[id2]==1)
gt1 = U[bu1,id2,ru1]
if TWP
S += ztw[ipl]*tr(U[b,id1,r]*gt1 / (U[b,id2,r]*U[bu2,id1,ru2]))
else
S += tr(U[b,id1,r]*gt1 / (U[b,id2,r]*U[bu2,id1,ru2]))
end
end
end
plx[I] = S
end
return nothing
end
function krnl_force_wilson_pln!(frc1, frc2, U::AbstractArray{T}, Ubnd, cG, ztw, ipl, lp::SpaceParm{N,M,BC_PERIODIC,D}) where {T,N,M,D}
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
it = I[N]
@inbounds begin
id1, id2 = lp.plidx[ipl]
bu1, ru1 = up((b, r), id1, lp)
bu2, ru2 = up((b, r), id2, lp)
TWP = (I[id1]==1)&&(I[id2]==1)
gt1 = U[bu1,id2,ru1]
g1 = gt1/U[bu2,id1,ru2]
g2 = U[b,id2,r]\U[b,id1,r]
if TWP
X = projalg(ztw,U[b,id1,r]*g1/U[b,id2,r])
frc2[bu1,id2,ru1] -= projalg(ztw,g1*g2)
frc2[bu2,id1,ru2] += projalg(ztw,g2*g1)
else
X = projalg(U[b,id1,r]*g1/U[b,id2,r])
frc2[bu1,id2,ru1] -= projalg(g1*g2)
frc2[bu2,id1,ru2] += projalg(g2*g1)
end
frc1[b ,id1, r ] -= X
frc1[b ,id2, r ] += X
end
return nothing
end
function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG, ztw, ipl, lp::SpaceParm{N,M,BC_PERIODIC,D}) where {T,N,M,D}
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
it = I[N]
@inbounds begin
id1, id2 = lp.plidx[ipl]
bu1, ru1 = up((b, r), id1, lp)
bu2, ru2 = up((b, r), id2, lp)
TWP = (I[id1]==1) && (I[id2]==1)
TWH1 = TWP || ( (I[id1]==1) && (I[id2]==2) )
TWH2 = TWP || ( (I[id1]==lp.iL[id1]) && (I[id2]==1) )
TWH3 = TWP || ( (I[id1]==1) && (I[id2]==lp.iL[id2]) )
TWH4 = TWP || ( (I[id1]==2) && (I[id2]==1) )
# H1 staple
(b1, r1) = dw((b,r), id2, lp)
(b2, r2) = up((b1,r1), id1, lp)
gc = U[b2,id2,r2]
h1 = (U[b1,id2,r1]\U[b1,id1,r1])*gc
# H2 staple
(b1, r1) = up((b,r), id1, lp)
(b2, r2) = up((b1,r1), id1, lp)
gb = U[b2,id2,r2]
(b2, r2) = up((b1,r1), id2, lp)
h2 = (U[b1,id1,r1]*gb)/U[b2,id1,r2]
# H3 staple
(b1, r1) = up((b,r), id2, lp)
(b2, r2) = up((b1,r1), id2, lp)
(b3, r3) = up((b1,r1), id1, lp)
gc = U[b3,id2,r3]
h3 = (U[b1,id2,r1]*U[b2,id1,r2])/gc
# H4 staple
(b1, r1) = dw((b,r), id1, lp)
(b2, r2) = up((b1,r1), id2, lp)
h4 = (U[b1,id1,r1]\U[b1,id2,r1])*U[b2,id1,r2]
# END staples
ga = U[bu1,id2,ru1]
g1 = ga/U[bu2,id1,ru2]
g2 = U[b,id2,r]\U[b,id1,r]
if TWP
X = projalg(c0*ztw,U[b,id1,r]*g1/U[b,id2,r])
frc2[bu1,id2,ru1] -= projalg(c0*ztw,g1*g2)
frc2[bu2,id1,ru2] += projalg(c0*ztw,g2*g1)
else
X = c0*projalg(U[b,id1,r]*g1/U[b,id2,r])
frc2[bu1,id2,ru1] -= c0*projalg(g1*g2)
frc2[bu2,id1,ru2] += c0*projalg(g2*g1)
end
if TWH1
frc1[b,id2,r] += projalg(ztw*c1,h1*g1/U[b,id2,r])
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(g1/U[b,id2,r])*h1)
frc2[bu2,id1,ru2] += projalg(ztw*c1,(U[b,id2,r]\h1)*g1)
else
frc1[b,id2,r] += c1*projalg(h1*g1/U[b,id2,r])
frc2[bu1,id2,ru1] -= c1*projalg((g1/U[b,id2,r])*h1)
frc2[bu2,id1,ru2] += c1*projalg((U[b,id2,r]\h1)*g1)
end
if TWH2
X += projalg(ztw*c1,U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2]))
frc2[bu2,id1,ru2] += projalg(ztw*c1,g2*h2/U[bu2,id1,ru2])
else
X += c1*projalg(U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2]))
frc2[bu2,id1,ru2] += c1*projalg(g2*h2/U[bu2,id1,ru2])
end
if TWH3
X += projalg(ztw*c1,U[b,id1,r]*ga/(U[b,id2,r]*h3))
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(ga/h3)*g2)
else
X += c1*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3))
frc2[bu1,id2,ru1] -= c1*projalg((ga/h3)*g2)
end
if TWH4
frc1[b,id1,r] -= projalg(ztw*c1,U[b,id1,r]*g1/h4)
frc2[bu1,id2,ru1] -= projalg(ztw*c1,(g1/h4)*U[b,id1,r])
frc2[bu2,id1,ru2] += projalg(ztw*c1,h4\U[b,id1,r]*g1)
else
frc1[b,id1,r] -= c1*projalg(U[b,id1,r]*g1/h4)
frc2[bu1,id2,ru1] -= c1*projalg((g1/h4)*U[b,id1,r])
frc2[bu2,id1,ru2] += c1*projalg(h4\U[b,id1,r]*g1)
end
frc1[b,id1,r] -= X
frc1[b,id2,r] += X
end
return nothing
end
@ -388,4 +949,3 @@ function force_pln!(frc1, ftmp, U, Ubnd, cG, ztw, lp::SpaceParm, c0=1)
return nothing
end

View file

@ -15,7 +15,7 @@
Given an algebra field with natural indexing, this routine sets the components to random Gaussian distributed values. If SF boundary conditions are used, the force at the boundaries is set to zero.
"""
function randomize!(f, lp::SpaceParm, ymws::YMworkspace)
if ymws.ALG == SU2alg
@timeit "Randomize SU(2) algebra field" begin
m = CUDA.randn(ymws.PRC, lp.bsz,lp.ndim,3,lp.rsz)
@ -54,31 +54,44 @@ function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::SpaceParm{N,M,BC_PERIODI
return nothing
end
function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::SpaceParm{N,M,B,D}) where {T,N,M,B,D}
function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,N,M,D}
@inbounds begin
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
for id in 1:lp.ndim
frc[b,id,r] = SU3alg(m[b,id,1,r], m[b,id,2,r], m[b,id,3,r],
m[b,id,4,r], m[b,id,5,r], m[b,id,6,r],
m[b,id,7,r], m[b,id,8,r])
end
end
return nothing
end
function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {T,N,M,D}
@inbounds begin
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
it = point_time((b,r), lp)
if ((B==BC_SF_AFWB)||(B==BC_SF_ORBI))
if it == 1
for id in 1:lp.ndim-1
frc[b,id,r] = zero(T)
end
frc[b,N,r] = SU3alg(m[b,N,1,r], m[b,N,2,r], m[b,N,3,r],
m[b,N,4,r], m[b,N,5,r], m[b,N,6,r],
m[b,N,7,r], m[b,N,8,r])
else
for id in 1:lp.ndim
frc[b,id,r] = SU3alg(m[b,id,1,r], m[b,id,2,r], m[b,id,3,r],
m[b,id,4,r], m[b,id,5,r], m[b,id,6,r],
m[b,id,7,r], m[b,id,8,r])
end
if it == 1
for id in 1:lp.ndim-1
frc[b,id,r] = zero(T)
end
frc[b,N,r] = SU3alg(m[b,N,1,r], m[b,N,2,r], m[b,N,3,r],
m[b,N,4,r], m[b,N,5,r], m[b,N,6,r],
m[b,N,7,r], m[b,N,8,r])
else
for id in 1:lp.ndim
frc[b,id,r] = SU3alg(m[b,id,1,r], m[b,id,2,r], m[b,id,3,r],
m[b,id,4,r], m[b,id,5,r], m[b,id,6,r],
m[b,id,7,r], m[b,id,8,r])
end
end
end
return nothing
end

View file

@ -134,7 +134,8 @@ function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::S
r = Int64(CUDA.blockIdx().x)
it = point_time((b, r), lp)
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) )
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) )
OBC = (B == BC_OPEN)
@inbounds for id in 1:N
bu, ru = up((b,r), id, lp)
@ -152,13 +153,21 @@ function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::S
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
projalg(U[b,id,r]*X/U[b,id,r]))
end
else
end
if OBC
if (it > 1) && (it < lp.iL[end])
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
projalg(U[b,id,r]*X/U[b,id,r]))
elseif ((it == lp.iL[end]) || (it == 1)) && (id < N)
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
projalg(U[b,id,r]*X/U[b,id,r]))
end
else
frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) +
projalg(U[b,id,r]*X/U[b,id,r]))
end
end
end
return nothing
end
@ -264,7 +273,8 @@ function Eoft_plaq(Eslc, U, gp::GaugeParm{T,G,NN}, lp::SpaceParm{N,M,B,D}, ymws:
@timeit "E(t) plaquette measurement" begin
ztw = ztwist(gp, lp)
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) )
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) )
OBC = (B == BC_OPEN)
tp = ntuple(i->i, N-1)
V3 = prod(lp.iL[1:end-1])
@ -285,6 +295,10 @@ function Eoft_plaq(Eslc, U, gp::GaugeParm{T,G,NN}, lp::SpaceParm{N,M,B,D}, ymws:
if !SFBC
Eslc[1,ipl] = Etmp[1] + Etmp[end]
end
if OBC ## Check normalization of timelike boundary plaquettes
Eslc[end,ipl] = Etmp[end-1]
Eslc[1,ipl] = Etmp[1]
end
else
for it in 1:lp.iL[end]
Eslc[it,ipl] = 2*Etmp[it]
@ -327,7 +341,6 @@ function krnl_plaq_pln!(plx, U::AbstractArray{T}, Ubnd, ztw, ipl, lp::SpaceParm{
plx[I] = tr(U[b,id1,r]*gt / (U[b,id2,r]*U[bu2,id1,ru2]))
end
end
return nothing
end
@ -350,21 +363,18 @@ function Qtop(Qslc, U, gp::GaugeParm, lp::SpaceParm{4,M,B,D}, ymws::YMworkspace)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_qd!(ymws.rm, -, ymws.frc1, ymws.frc2, lp)
end
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, 2,4, ztw[2], ztw[4], lp)
end
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_qd!(ymws.rm, +, ymws.frc1, ymws.frc2, lp)
end
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, 3,6, ztw[3], ztw[6], lp)
end
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_qd!(ymws.rm, -, ymws.frc1, ymws.frc2, lp)
end
Qslc .= reshape(Array(CUDA.reduce(+, ymws.rm; dims=tp)),lp.iL[end])./(32*pi^2)
end
@ -445,7 +455,7 @@ function krnl_add_et!(rm, frc1, lp::SpaceParm{4,M,B,D}) where {M,B,D}
I = point_coord((b,r), lp)
rm[I] = dot(X1,X1)
end
return nothing
end
@ -474,6 +484,7 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T},
#First plane
id1, id2 = lp.plidx[ipl1]
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
OBC = ((B == BC_OPEN) && (id1 == 4))
TWP = ((I[id1]==1)&&(I[id2]==1))
bu1, ru1 = up((b, r), id1, lp)
@ -493,6 +504,11 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T},
frc1[bu1,2,ru1] = zero(TA)
frc1[bd,3,rd] = zero(TA)
frc1[bu2,4,ru2] = projalg(l2*l1)
elseif OBC && (it == lp.iL[end])
frc1[b,1,r] = projalg(U[b,id1,r]*l1/U[b,id2,r])
frc1[bu1,2,ru1] = zero(TA)
frc1[bd,3,rd] = zero(TA)
frc1[bu2,4,ru2] = projalg(l2*l1)
else
if TWP
frc1[b,1,r] = projalg(ztw1, U[b,id1,r]*l1/U[b,id2,r])
@ -510,6 +526,7 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T},
# Second plane
id1, id2 = lp.plidx[ipl2]
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
OBC = ((B == BC_OPEN) && (id1 == 4))
TWP = ((I[id1]==1)&&(I[id2]==1))
bu1, ru1 = up((b, r), id1, lp)
@ -529,6 +546,11 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T},
frc2[bu1,2,ru1] = zero(TA)
frc2[bd,3,rd] = zero(TA)
frc2[bu2,4,ru2] = projalg(l2*l1)
elseif OBC && (it == lp.iL[end])
frc1[b,1,r] = projalg(U[b,id1,r]*l1/U[b,id2,r])
frc1[bu1,2,ru1] = zero(TA)
frc1[bd,3,rd] = zero(TA)
frc1[bu2,4,ru2] = projalg(l2*l1)
else
if TWP
frc2[b,1,r] = projalg(ztw2, U[b,id1,r]*l1/U[b,id2,r])
@ -543,7 +565,5 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T},
end
end
end
return nothing
end