mirror of
https://igit.ific.uv.es/alramos/latticegpu.jl.git
synced 2025-05-14 19:23:42 +02:00
Csw improvement for fermions
This commit is contained in:
parent
32c4c84864
commit
b83a191051
5 changed files with 341 additions and 29 deletions
|
@ -50,6 +50,7 @@ struct DiracWorkspace{T}
|
||||||
sp
|
sp
|
||||||
sAp
|
sAp
|
||||||
st
|
st
|
||||||
|
csw
|
||||||
|
|
||||||
function DiracWorkspace(::Type{G}, ::Type{T}, lp::SpaceParm{4,6,B,D}) where {G,T <: AbstractFloat, B,D}
|
function DiracWorkspace(::Type{G}, ::Type{T}, lp::SpaceParm{4,6,B,D}) where {G,T <: AbstractFloat, B,D}
|
||||||
|
|
||||||
|
@ -57,18 +58,121 @@ struct DiracWorkspace{T}
|
||||||
sp = scalar_field(Spinor{4,G}, lp)
|
sp = scalar_field(Spinor{4,G}, lp)
|
||||||
sAp = scalar_field(Spinor{4,G}, lp)
|
sAp = scalar_field(Spinor{4,G}, lp)
|
||||||
st = scalar_field(Spinor{4,G}, lp)
|
st = scalar_field(Spinor{4,G}, lp)
|
||||||
return new{T}(sr,sp,sAp,st)
|
|
||||||
|
csw = tensor_field(U3alg{T},lp)
|
||||||
|
|
||||||
|
return new{T}(sr,sp,sAp,st,csw,cs)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
export DiracWorkspace, DiracParam
|
export DiracWorkspace, DiracParam
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
function Csw!(dws, U, gp, lp::SpaceParm)
|
||||||
|
|
||||||
|
Computes the clover and stores it in dws.csw.
|
||||||
|
|
||||||
|
"""
|
||||||
|
function Csw!(dws, U, gp, lp::SpaceParm{4,6,B,D}) where {B,D}
|
||||||
|
|
||||||
|
@timeit "Csw computation" begin
|
||||||
|
|
||||||
|
for i in 1:Int(lp.npls)
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_csw!(dws.csw, U, gp.Ubnd, i, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, lp::SpaceParm{4,M,B,D}) where {T,M,B,D}
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
b = Int64(CUDA.threadIdx().x)
|
||||||
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
I = point_coord((b,r), lp)
|
||||||
|
it = I[4]
|
||||||
|
|
||||||
|
id1, id2 = lp.plidx[ipl]
|
||||||
|
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
|
||||||
|
|
||||||
|
bu1, ru1 = up((b, r), id1, lp)
|
||||||
|
bu2, ru2 = up((b, r), id2, lp)
|
||||||
|
bd1, rd1 = dw((b, r), id1, lp)
|
||||||
|
bd2, rd2 = dw((b, r), id2, lp)
|
||||||
|
bdd, rdd = dw((bd1, rd1), id2, lp)
|
||||||
|
bud, rud = dw((bu1, ru1), id2, lp)
|
||||||
|
bdu, rdu = up((bd1, rd1), id2, lp)
|
||||||
|
|
||||||
|
if SFBC && (it == lp.iL[end])
|
||||||
|
gt1 = Ubnd[id2]
|
||||||
|
gt2 = Ubnd[id2]
|
||||||
|
else
|
||||||
|
gt1 = U[bu1,id2,ru1]
|
||||||
|
gt2 = U[bud,id2,rud]
|
||||||
|
end
|
||||||
|
|
||||||
|
M1 = U[b,id1,r]*gt1/(U[b,id2,r]*U[bu2,id1,ru2])
|
||||||
|
M2 = (U[bd2,id2,rd2]\(U[bd2,id1,rd2]*gt2))/U[b,id1,r]
|
||||||
|
M3 = (U[bdd,id2,rdd]*U[bd1,id1,rd1])\(U[bdd,id1,rdd]*U[bd2,id2,rd2])
|
||||||
|
M4 = (U[b,id2,r]/(U[bd1,id2,rd1]*U[bdu,id1,rdu]))*U[bd1,id1,rd1]
|
||||||
|
|
||||||
|
|
||||||
|
if !(SFBC && (it == 1))
|
||||||
|
csw[b,ipl,r] = 0.125*(antsym(M1)+antsym(M2)+antsym(M3)+antsym(M4))
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
||||||
|
|
||||||
|
if abs(dpar.csw) > 1.0E-10
|
||||||
|
@timeit "Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
else
|
||||||
@timeit "Dw" begin
|
@timeit "Dw" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.th, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.th, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_Dwimpr!(so, U, si, Fcsw, m0, th, csw, lp::SpaceParm{4,6,B,D}) where {B,D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||||
|
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
||||||
|
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
@ -102,11 +206,56 @@ end
|
||||||
|
|
||||||
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||||
|
|
||||||
|
if abs(dpar.csw) > 1.0E-10
|
||||||
|
@timeit "Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
else
|
||||||
@timeit "Dw" begin
|
@timeit "Dw" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.th, dpar.ct, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.th, dpar.ct, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_Dwimpr!(so, U, si, Fcsw, m0, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||||
|
|
||||||
|
# The field si is assumed to be zero at t = 0
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) != 1)
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||||
|
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
||||||
|
|
||||||
|
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
|
||||||
|
so[b,r] += (ct-1.0)*si[b,r]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
@ -147,11 +296,48 @@ end
|
||||||
|
|
||||||
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
||||||
|
|
||||||
|
if abs(dpar.csw) > 1.0E-10
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
else
|
||||||
@timeit "g5Dw" begin
|
@timeit "g5Dw" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.th, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.th, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, th, csw, lp::SpaceParm{4,6,B,D}) where {B,D}
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||||
|
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
||||||
|
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
so[b,r] = dmul(Gamma{5}, so[b,r])
|
||||||
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
@ -186,11 +372,58 @@ end
|
||||||
|
|
||||||
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||||
|
|
||||||
|
if abs(dpar.csw) > 1.0E-10
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
else
|
||||||
@timeit "g5Dw" begin
|
@timeit "g5Dw" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.th, dpar.ct, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.th, dpar.ct, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||||
|
|
||||||
|
# The field si is assumed to be zero at t = 0
|
||||||
|
|
||||||
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) != 1)
|
||||||
|
|
||||||
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
|
||||||
|
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||||
|
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
|
||||||
|
|
||||||
|
|
||||||
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
|
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
|
||||||
|
so[b,r] += (ct-1.0)*si[b,r]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
so[b,r] = dmul(Gamma{5}, so[b,r])
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
@ -231,28 +464,24 @@ function krnl_g5Dw!(so, U, si, m0, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D}
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
|
||||||
|
|
||||||
@timeit "DwdagDw" begin
|
|
||||||
|
|
||||||
@timeit "g5Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.th, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
@timeit "g5Dw" begin
|
|
||||||
CUDA.@sync begin
|
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, dpar.th, lp)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
|
|
||||||
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||||
|
|
||||||
|
if abs(dpar.csw) > 1.0E-10
|
||||||
|
@timeit "DwdagDw" begin
|
||||||
|
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
else
|
||||||
@timeit "DwdagDw" begin
|
@timeit "DwdagDw" begin
|
||||||
|
|
||||||
@timeit "g5Dw" begin
|
@timeit "g5Dw" begin
|
||||||
|
@ -267,6 +496,45 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{Sp
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
||||||
|
|
||||||
|
if abs(dpar.csw) > 1.0E-10
|
||||||
|
@timeit "DwdagDw" begin
|
||||||
|
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, dpar.th, dpar.csw, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
else
|
||||||
|
@timeit "DwdagDw" begin
|
||||||
|
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.th, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
@timeit "g5Dw" begin
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, dpar.th, lp)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -291,6 +559,6 @@ end
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
export Dw!, g5Dw!, DwdagDw!, SF_bndfix!
|
export Dw!, g5Dw!, DwdagDw!, SF_bndfix!, Csw!
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
|
@ -43,7 +43,15 @@ Returns a scalar field of elemental type `T`, with lexicografic memory layout.
|
||||||
"""
|
"""
|
||||||
scalar_field_point(::Type{T}, lp::SpaceParm{N,M,D}) where {T,N,M,D} = CuArray{T, N}(undef, lp.iL...)
|
scalar_field_point(::Type{T}, lp::SpaceParm{N,M,D}) where {T,N,M,D} = CuArray{T, N}(undef, lp.iL...)
|
||||||
|
|
||||||
export vector_field, scalar_field, nscalar_field, scalar_field_point
|
"""
|
||||||
|
tensor_field(::Type{T}, lp::SpaceParm)
|
||||||
|
|
||||||
|
Returns a tensor field of elemental type `T`.
|
||||||
|
"""
|
||||||
|
tensor_field(::Type{T}, lp::SpaceParm) where {T} = CuArray{T, 3}(undef, lp.bsz, lp.npls, lp.rsz)
|
||||||
|
|
||||||
|
|
||||||
|
export vector_field, scalar_field, nscalar_field, scalar_field_point, tensor_field
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
33
src/Groups/AlgebraU3.jl
Normal file
33
src/Groups/AlgebraU3.jl
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
struct U3alg{T} <: Algebra
|
||||||
|
u11::T
|
||||||
|
u22::T
|
||||||
|
u33::T
|
||||||
|
u12::Complex{T}
|
||||||
|
u13::Complex{T}
|
||||||
|
u23::Complex{T}
|
||||||
|
end
|
||||||
|
|
||||||
|
function antsym(a::SU3{T}) where T <: AbstractFloat
|
||||||
|
t1 = 2.0*imag(a.u11)
|
||||||
|
t2 = 2.0*imag(a.u22)
|
||||||
|
t3 = 2.0*imag(a.u12*a.u21 - a.u11*a.u22)
|
||||||
|
t4 = a.u12 - conj(a.u21)
|
||||||
|
t5 = a.u13 - (a.u12*a.u23 - a.u13*a.u22)
|
||||||
|
t6 = a.u23 - (a.u13*a.u21 - a.u11*a.u23)
|
||||||
|
|
||||||
|
return U3alg{T}(t1,t2,t3,t4,t5,t6)
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
Base.:*(a::U3alg{T},b::SU3fund{T}) where T <: AbstractFloat = SU3fund{T}(im*a.u11*b.t1 + a.u12*b.t2 + a.u13*b.t3,
|
||||||
|
-conj(a.u12)*b.t1 + im*a.u22*b.t2 + a.u23*b.t3,
|
||||||
|
-conj(a.u13)*b.t1 - conj(a.u23)*b.t2 + im*a.u33*b.t3)
|
||||||
|
|
||||||
|
Base.:+(a::U3alg{T},b::U3alg{T}) where T <: AbstractFloat = U3alg{T}(a.u11 + b.u11, a.u22 + b.u22, a.u33 + b.u33, a.u12 + b.u12, a.u13 + b.u13, a.u23 + b.u23)
|
||||||
|
|
||||||
|
|
||||||
|
Base.:*(r::Number, a::U3alg{T}) where T <: AbstractFloat = U3alg{T}(r*a.u11, r*a.u22, r*a.u33, r*a.u12, r*a.u13, r*a.u23)
|
||||||
|
|
|
@ -74,11 +74,14 @@ include("FundamentalSU3.jl")
|
||||||
export imm, mimm
|
export imm, mimm
|
||||||
## END SU(3)
|
## END SU(3)
|
||||||
|
|
||||||
|
include("AlgebraU3.jl")
|
||||||
|
export U3alg
|
||||||
|
|
||||||
include("GroupU1.jl")
|
include("GroupU1.jl")
|
||||||
export U1, U1alg
|
export U1, U1alg
|
||||||
|
|
||||||
|
|
||||||
export dot, expm, exp, dag, unitarize, inverse, tr, projalg, norm, norm2, isgroup, alg2mat, dev_one
|
export dot, expm, exp, dag, unitarize, inverse, tr, projalg, norm, norm2, isgroup, alg2mat, dev_one, antsym
|
||||||
|
|
||||||
|
|
||||||
end # module
|
end # module
|
||||||
|
|
|
@ -16,8 +16,8 @@ include("Groups/Groups.jl")
|
||||||
|
|
||||||
using .Groups
|
using .Groups
|
||||||
export Group, Algebra, GMatrix
|
export Group, Algebra, GMatrix
|
||||||
export SU2, SU2alg, SU3, SU3alg, M3x3, M2x2, U1, U1alg, SU3fund
|
export SU2, SU2alg, SU3, SU3alg, M3x3, M2x2, U1, U1alg, SU3fund, U3alg
|
||||||
export dot, expm, exp, dag, unitarize, inverse, tr, projalg, norm, norm2, isgroup, alg2mat, dev_one
|
export dot, expm, exp, dag, unitarize, inverse, tr, projalg, norm, norm2, isgroup, alg2mat, dev_one, antsym
|
||||||
|
|
||||||
include("Space/Space.jl")
|
include("Space/Space.jl")
|
||||||
|
|
||||||
|
@ -28,7 +28,7 @@ export BC_PERIODIC, BC_OPEN, BC_SF_AFWB, BC_SF_ORBI
|
||||||
|
|
||||||
include("Fields/Fields.jl")
|
include("Fields/Fields.jl")
|
||||||
using .Fields
|
using .Fields
|
||||||
export vector_field, scalar_field, nscalar_field, scalar_field_point
|
export vector_field, scalar_field, nscalar_field, scalar_field_point, tensor_field
|
||||||
|
|
||||||
include("MD/MD.jl")
|
include("MD/MD.jl")
|
||||||
using .MD
|
using .MD
|
||||||
|
@ -57,7 +57,7 @@ export pmul, gpmul, gdagpmul, dmul
|
||||||
include("Dirac/Dirac.jl")
|
include("Dirac/Dirac.jl")
|
||||||
using .Dirac
|
using .Dirac
|
||||||
export DiracWorkspace, DiracParam
|
export DiracWorkspace, DiracParam
|
||||||
export Dw!, g5Dw!, DwdagDw!, SF_bndfix!
|
export Dw!, g5Dw!, DwdagDw!, SF_bndfix!, Csw!
|
||||||
|
|
||||||
|
|
||||||
include("Solvers/Solvers.jl")
|
include("Solvers/Solvers.jl")
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue