Csw improvement for fermions

This commit is contained in:
Fernando P. Panadero 2023-11-20 17:02:18 +01:00
parent 32c4c84864
commit b83a191051
5 changed files with 341 additions and 29 deletions

View file

@ -50,6 +50,7 @@ struct DiracWorkspace{T}
sp
sAp
st
csw
function DiracWorkspace(::Type{G}, ::Type{T}, lp::SpaceParm{4,6,B,D}) where {G,T <: AbstractFloat, B,D}
@ -57,22 +58,125 @@ struct DiracWorkspace{T}
sp = scalar_field(Spinor{4,G}, lp)
sAp = scalar_field(Spinor{4,G}, lp)
st = scalar_field(Spinor{4,G}, lp)
return new{T}(sr,sp,sAp,st)
csw = tensor_field(U3alg{T},lp)
return new{T}(sr,sp,sAp,st,csw,cs)
end
end
export DiracWorkspace, DiracParam
"""
function Csw!(dws, U, gp, lp::SpaceParm)
Computes the clover and stores it in dws.csw.
"""
function Csw!(dws, U, gp, lp::SpaceParm{4,6,B,D}) where {B,D}
@timeit "Csw computation" begin
for i in 1:Int(lp.npls)
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_csw!(dws.csw, U, gp.Ubnd, i, lp)
end
end
end
return nothing
end
function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, lp::SpaceParm{4,M,B,D}) where {T,M,B,D}
@inbounds begin
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
I = point_coord((b,r), lp)
it = I[4]
id1, id2 = lp.plidx[ipl]
SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4)
bu1, ru1 = up((b, r), id1, lp)
bu2, ru2 = up((b, r), id2, lp)
bd1, rd1 = dw((b, r), id1, lp)
bd2, rd2 = dw((b, r), id2, lp)
bdd, rdd = dw((bd1, rd1), id2, lp)
bud, rud = dw((bu1, ru1), id2, lp)
bdu, rdu = up((bd1, rd1), id2, lp)
if SFBC && (it == lp.iL[end])
gt1 = Ubnd[id2]
gt2 = Ubnd[id2]
else
gt1 = U[bu1,id2,ru1]
gt2 = U[bud,id2,rud]
end
M1 = U[b,id1,r]*gt1/(U[b,id2,r]*U[bu2,id1,ru2])
M2 = (U[bd2,id2,rd2]\(U[bd2,id1,rd2]*gt2))/U[b,id1,r]
M3 = (U[bdd,id2,rdd]*U[bd1,id1,rd1])\(U[bdd,id1,rdd]*U[bd2,id2,rd2])
M4 = (U[b,id2,r]/(U[bd1,id2,rd1]*U[bdu,id1,rdu]))*U[bd1,id1,rd1]
if !(SFBC && (it == 1))
csw[b,ipl,r] = 0.125*(antsym(M1)+antsym(M2)+antsym(M3)+antsym(M4))
end
end
return nothing
end
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
if abs(dpar.csw) > 1.0E-10
@timeit "Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, lp)
end
end
else
@timeit "Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.th, lp)
end
end
end
return nothing
end
function krnl_Dwimpr!(so, U, si, Fcsw, m0, th, csw, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
end
return nothing
end
function krnl_Dw!(so, U, si, m0, th, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
@ -102,15 +206,60 @@ end
function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
if abs(dpar.csw) > 1.0E-10
@timeit "Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp)
end
end
else
@timeit "Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.th, dpar.ct, lp)
end
end
end
return nothing
end
function krnl_Dwimpr!(so, U, si, Fcsw, m0, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
# The field si is assumed to be zero at t = 0
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
if (point_time((b,r),lp) != 1)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
so[b,r] += (ct-1.0)*si[b,r]
end
end
end
return nothing
end
function krnl_Dw!(so, U, si, m0, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
# The field si is assumed to be zero at t = 0
@ -147,11 +296,48 @@ end
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
if abs(dpar.csw) > 1.0E-10
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, lp)
end
end
else
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.th, lp)
end
end
end
return nothing
end
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, th, csw, lp::SpaceParm{4,6,B,D}) where {B,D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
so[b,r] = dmul(Gamma{5}, so[b,r])
end
return nothing
end
@ -186,15 +372,62 @@ end
function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
if abs(dpar.csw) > 1.0E-10
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp)
end
end
else
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.th, dpar.ct, lp)
end
end
end
return nothing
end
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
# The field si is assumed to be zero at t = 0
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
if (point_time((b,r),lp) != 1)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp)
@inbounds begin
so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
+Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4])
so[b,r] += (ct-1.0)*si[b,r]
end
end
end
so[b,r] = dmul(Gamma{5}, so[b,r])
return nothing
end
function krnl_g5Dw!(so, U, si, m0, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
# The field si is assumed to be zero at t = 0
@ -231,28 +464,24 @@ function krnl_g5Dw!(so, U, si, m0, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D}
return nothing
end
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
@timeit "DwdagDw" begin
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.th, lp)
end
end
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, dpar.th, lp)
end
end
end
return nothing
end
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
if abs(dpar.csw) > 1.0E-10
@timeit "DwdagDw" begin
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp)
end
end
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp)
end
end
end
else
@timeit "DwdagDw" begin
@timeit "g5Dw" begin
@ -267,6 +496,45 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{Sp
end
end
end
end
return nothing
end
function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
if abs(dpar.csw) > 1.0E-10
@timeit "DwdagDw" begin
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, lp)
end
end
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, dpar.th, dpar.csw, lp)
end
end
end
else
@timeit "DwdagDw" begin
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.th, lp)
end
end
@timeit "g5Dw" begin
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, dpar.th, lp)
end
end
end
end
return nothing
end
@ -291,6 +559,6 @@ end
export Dw!, g5Dw!, DwdagDw!, SF_bndfix!
export Dw!, g5Dw!, DwdagDw!, SF_bndfix!, Csw!
end

View file

@ -43,7 +43,15 @@ Returns a scalar field of elemental type `T`, with lexicografic memory layout.
"""
scalar_field_point(::Type{T}, lp::SpaceParm{N,M,D}) where {T,N,M,D} = CuArray{T, N}(undef, lp.iL...)
export vector_field, scalar_field, nscalar_field, scalar_field_point
"""
tensor_field(::Type{T}, lp::SpaceParm)
Returns a tensor field of elemental type `T`.
"""
tensor_field(::Type{T}, lp::SpaceParm) where {T} = CuArray{T, 3}(undef, lp.bsz, lp.npls, lp.rsz)
export vector_field, scalar_field, nscalar_field, scalar_field_point, tensor_field
end

33
src/Groups/AlgebraU3.jl Normal file
View file

@ -0,0 +1,33 @@
struct U3alg{T} <: Algebra
u11::T
u22::T
u33::T
u12::Complex{T}
u13::Complex{T}
u23::Complex{T}
end
function antsym(a::SU3{T}) where T <: AbstractFloat
t1 = 2.0*imag(a.u11)
t2 = 2.0*imag(a.u22)
t3 = 2.0*imag(a.u12*a.u21 - a.u11*a.u22)
t4 = a.u12 - conj(a.u21)
t5 = a.u13 - (a.u12*a.u23 - a.u13*a.u22)
t6 = a.u23 - (a.u13*a.u21 - a.u11*a.u23)
return U3alg{T}(t1,t2,t3,t4,t5,t6)
end
Base.:*(a::U3alg{T},b::SU3fund{T}) where T <: AbstractFloat = SU3fund{T}(im*a.u11*b.t1 + a.u12*b.t2 + a.u13*b.t3,
-conj(a.u12)*b.t1 + im*a.u22*b.t2 + a.u23*b.t3,
-conj(a.u13)*b.t1 - conj(a.u23)*b.t2 + im*a.u33*b.t3)
Base.:+(a::U3alg{T},b::U3alg{T}) where T <: AbstractFloat = U3alg{T}(a.u11 + b.u11, a.u22 + b.u22, a.u33 + b.u33, a.u12 + b.u12, a.u13 + b.u13, a.u23 + b.u23)
Base.:*(r::Number, a::U3alg{T}) where T <: AbstractFloat = U3alg{T}(r*a.u11, r*a.u22, r*a.u33, r*a.u12, r*a.u13, r*a.u23)

View file

@ -74,11 +74,14 @@ include("FundamentalSU3.jl")
export imm, mimm
## END SU(3)
include("AlgebraU3.jl")
export U3alg
include("GroupU1.jl")
export U1, U1alg
export dot, expm, exp, dag, unitarize, inverse, tr, projalg, norm, norm2, isgroup, alg2mat, dev_one
export dot, expm, exp, dag, unitarize, inverse, tr, projalg, norm, norm2, isgroup, alg2mat, dev_one, antsym
end # module

View file

@ -16,8 +16,8 @@ include("Groups/Groups.jl")
using .Groups
export Group, Algebra, GMatrix
export SU2, SU2alg, SU3, SU3alg, M3x3, M2x2, U1, U1alg, SU3fund
export dot, expm, exp, dag, unitarize, inverse, tr, projalg, norm, norm2, isgroup, alg2mat, dev_one
export SU2, SU2alg, SU3, SU3alg, M3x3, M2x2, U1, U1alg, SU3fund, U3alg
export dot, expm, exp, dag, unitarize, inverse, tr, projalg, norm, norm2, isgroup, alg2mat, dev_one, antsym
include("Space/Space.jl")
@ -28,7 +28,7 @@ export BC_PERIODIC, BC_OPEN, BC_SF_AFWB, BC_SF_ORBI
include("Fields/Fields.jl")
using .Fields
export vector_field, scalar_field, nscalar_field, scalar_field_point
export vector_field, scalar_field, nscalar_field, scalar_field_point, tensor_field
include("MD/MD.jl")
using .MD
@ -57,7 +57,7 @@ export pmul, gpmul, gdagpmul, dmul
include("Dirac/Dirac.jl")
using .Dirac
export DiracWorkspace, DiracParam
export Dw!, g5Dw!, DwdagDw!, SF_bndfix!
export Dw!, g5Dw!, DwdagDw!, SF_bndfix!, Csw!
include("Solvers/Solvers.jl")