diff --git a/src/Dirac/Dirac.jl b/src/Dirac/Dirac.jl index 5ee8a6a..53e22d3 100644 --- a/src/Dirac/Dirac.jl +++ b/src/Dirac/Dirac.jl @@ -19,11 +19,38 @@ using ..Fields using ..YM using ..Spinors + +struct DiracParam{T,R} + m0::T + csw::T + th::NTuple{4,Complex{T}} + ct::T + + + function DiracParam{T}(::Type{R},m0,csw,th,ct) where {T,R} + return new{T,R}(m0,csw,th,ct) + end + + function DiracParam{T}(m0,csw,th,ct) where {T} + return new{T,SU3fund}(m0,csw,th,ct) + end +end +function Base.show(io::IO, dpar::DiracParam{T,R}) where {T,R} + + println(io, "Wilson fermions in the: ", R, " representation") + println(io, " - Bare mass: ", dpar.m0," // Kappa = ",0.5/(dpar.m0+4)) + println(io, " - Csw : ", dpar.csw) + println(io, " - c_t: ", dpar.ct) + println(io, " - Theta: ", dpar.th) + return nothing +end + struct DiracWorkspace{T} sr sp sAp st + csw function DiracWorkspace(::Type{G}, ::Type{T}, lp::SpaceParm{4,6,B,D}) where {G,T <: AbstractFloat, B,D} @@ -31,53 +58,285 @@ struct DiracWorkspace{T} sp = scalar_field(Spinor{4,G}, lp) sAp = scalar_field(Spinor{4,G}, lp) st = scalar_field(Spinor{4,G}, lp) - return new{T}(sr,sp,sAp,st) + + csw = tensor_field(U3alg{T},lp) + + return new{T}(sr,sp,sAp,st,csw,cs) end end -export DiracWorkspace -function Dw!(so, U, si, m0, lp::SpaceParm) +export DiracWorkspace, DiracParam - @timeit "Dw" begin - CUDA.@sync begin - CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, m0, [1,1,1,1], lp) + +""" + function Csw!(dws, U, gp, lp::SpaceParm) + +Computes the clover and stores it in dws.csw. + +""" +function Csw!(dws, U, gp, lp::SpaceParm{4,6,B,D}) where {B,D} + + @timeit "Csw computation" begin + + for i in 1:Int(lp.npls) + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_csw!(dws.csw, U, gp.Ubnd, i, lp) + end + end + end + + return nothing +end + +function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, lp::SpaceParm{4,M,B,D}) where {T,M,B,D} + + @inbounds begin + b = Int64(CUDA.threadIdx().x) + r = Int64(CUDA.blockIdx().x) + I = point_coord((b,r), lp) + it = I[4] + + id1, id2 = lp.plidx[ipl] + SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4) + + bu1, ru1 = up((b, r), id1, lp) + bu2, ru2 = up((b, r), id2, lp) + bd1, rd1 = dw((b, r), id1, lp) + bd2, rd2 = dw((b, r), id2, lp) + bdd, rdd = dw((bd1, rd1), id2, lp) + bud, rud = dw((bu1, ru1), id2, lp) + bdu, rdu = up((bd1, rd1), id2, lp) + + if SFBC && (it == lp.iL[end]) + gt1 = Ubnd[id2] + gt2 = Ubnd[id2] + else + gt1 = U[bu1,id2,ru1] + gt2 = U[bud,id2,rud] + end + + M1 = U[b,id1,r]*gt1/(U[b,id2,r]*U[bu2,id1,ru2]) + M2 = (U[bd2,id2,rd2]\(U[bd2,id1,rd2]*gt2))/U[b,id1,r] + M3 = (U[bdd,id2,rdd]*U[bd1,id1,rd1])\(U[bdd,id1,rdd]*U[bd2,id2,rd2]) + M4 = (U[b,id2,r]/(U[bd1,id2,rd1]*U[bdu,id1,rdu]))*U[bd1,id1,rd1] + + + if !(SFBC && (it == 1)) + csw[b,ipl,r] = 0.125*(antsym(M1)+antsym(M2)+antsym(M3)+antsym(M4)) + end + + end + + return nothing +end + +function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D} + + if abs(dpar.csw) > 1.0E-10 + @timeit "Dw" begin + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, lp) + end + end + else + @timeit "Dw" begin + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.th, lp) + end end end return nothing end -function DwdagDw!(so, U, si, m0, st, lp::SpaceParm) +function krnl_Dwimpr!(so, U, si, Fcsw, m0, th, csw, lp::SpaceParm{4,6,B,D}) where {B,D} + + b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x) + + bu1, ru1 = up((b,r), 1, lp) + bd1, rd1 = dw((b,r), 1, lp) + bu2, ru2 = up((b,r), 2, lp) + bd2, rd2 = dw((b,r), 2, lp) + bu3, ru3 = up((b,r), 3, lp) + bd3, rd3 = dw((b,r), 3, lp) + bu4, ru4 = up((b,r), 4, lp) + bd4, rd4 = dw((b,r), 4, lp) + + @inbounds begin + + so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r]) + +Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])) + + so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) + + th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) + + th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) + + th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) ) - @timeit "DwdagDw" begin - CUDA.@sync begin - CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(st, U, si, m0, [1,1,1,1], lp) - end - CUDA.@sync begin - CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, st, m0, [1,1,1,1], lp) - end end - + return nothing end - function krnl_Dw!(so, U, si, m0, th, lp::SpaceParm{4,6,B,D}) where {B,D} b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x) - # For SF: - # - cttilde affects mass term at x0 = a, T-a - # - Spinor can be periodic as long as 0 at x_0=0 + bu1, ru1 = up((b,r), 1, lp) + bd1, rd1 = dw((b,r), 1, lp) + bu2, ru2 = up((b,r), 2, lp) + bd2, rd2 = dw((b,r), 2, lp) + bu3, ru3 = up((b,r), 3, lp) + bd3, rd3 = dw((b,r), 3, lp) + bu4, ru4 = up((b,r), 4, lp) + bd4, rd4 = dw((b,r), 4, lp) + @inbounds begin + so[b,r] = (4+m0)*si[b,r] - for id in 1:4 - bu, ru = up((b,r), id, lp) - bd, rd = dw((b,r), id, lp) - - so[b,r] -= ( th[id]*gpmul(Pgamma{id,-1},U[b,id,r],si[bu,ru]) + - conj(th[id])*gdagpmul(Pgamma{id,+1},U[bd,id,rd],si[bd,rd]) )/2 + + so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) + + th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) + + th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) + + th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) ) + + end + + return nothing +end + +function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D} + + if abs(dpar.csw) > 1.0E-10 + @timeit "Dw" begin + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp) + end end + else + @timeit "Dw" begin + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.th, dpar.ct, lp) + end + end + end + + return nothing +end + +function krnl_Dwimpr!(so, U, si, Fcsw, m0, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D} + + # The field si is assumed to be zero at t = 0 + + b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x) + + if (point_time((b,r),lp) != 1) + + bu1, ru1 = up((b,r), 1, lp) + bd1, rd1 = dw((b,r), 1, lp) + bu2, ru2 = up((b,r), 2, lp) + bd2, rd2 = dw((b,r), 2, lp) + bu3, ru3 = up((b,r), 3, lp) + bd3, rd3 = dw((b,r), 3, lp) + bu4, ru4 = up((b,r), 4, lp) + bd4, rd4 = dw((b,r), 4, lp) + + @inbounds begin + + so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r]) + +Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])) + + + so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) + + th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) + + th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) + + th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) ) + + if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4]) + so[b,r] += (ct-1.0)*si[b,r] + end + end + end + + return nothing +end + +function krnl_Dw!(so, U, si, m0, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D} + + # The field si is assumed to be zero at t = 0 + + b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x) + + if (point_time((b,r),lp) != 1) + + bu1, ru1 = up((b,r), 1, lp) + bd1, rd1 = dw((b,r), 1, lp) + bu2, ru2 = up((b,r), 2, lp) + bd2, rd2 = dw((b,r), 2, lp) + bu3, ru3 = up((b,r), 3, lp) + bd3, rd3 = dw((b,r), 3, lp) + bu4, ru4 = up((b,r), 4, lp) + bd4, rd4 = dw((b,r), 4, lp) + + @inbounds begin + + so[b,r] = (4+m0)*si[b,r] + so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) + + th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) + + th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) + + th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) ) + + if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4]) + so[b,r] += (ct-1.0)*si[b,r] + end + end + end + + return nothing +end + +function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D} + + if abs(dpar.csw) > 1.0E-10 + @timeit "g5Dw" begin + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, lp) + end + end + else + @timeit "g5Dw" begin + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.th, lp) + end + end + end + + return nothing +end + +function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, th, csw, lp::SpaceParm{4,6,B,D}) where {B,D} + + b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x) + + bu1, ru1 = up((b,r), 1, lp) + bd1, rd1 = dw((b,r), 1, lp) + bu2, ru2 = up((b,r), 2, lp) + bd2, rd2 = dw((b,r), 2, lp) + bu3, ru3 = up((b,r), 3, lp) + bd3, rd3 = dw((b,r), 3, lp) + bu4, ru4 = up((b,r), 4, lp) + bd4, rd4 = dw((b,r), 4, lp) + + @inbounds begin + + so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r]) + +Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])) + + so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) + + th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) + + th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) + + th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) ) + + so[b,r] = dmul(Gamma{5}, so[b,r]) end return nothing @@ -87,21 +346,292 @@ function krnl_g5Dw!(so, U, si, m0, th, lp::SpaceParm{4,6,B,D}) where {B,D} b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x) + bu1, ru1 = up((b,r), 1, lp) + bd1, rd1 = dw((b,r), 1, lp) + bu2, ru2 = up((b,r), 2, lp) + bd2, rd2 = dw((b,r), 2, lp) + bu3, ru3 = up((b,r), 3, lp) + bd3, rd3 = dw((b,r), 3, lp) + bu4, ru4 = up((b,r), 4, lp) + bd4, rd4 = dw((b,r), 4, lp) + @inbounds begin + so[b,r] = (4+m0)*si[b,r] - for id in 1:4 - bu, ru = up((b,r), id, lp) - bd, rd = dw((b,r), id, lp) - - so[b,r] -= ( th[id]*gpmul(Pgamma{id,-1},U[b,id,r],si[bu,ru]) + - conj(th[id])*gdagpmul(Pgamma{id,+1},U[bd,id,rd],si[bd,rd]) )/2 - end + + so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) + + th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) + + th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) + + th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) ) + so[b,r] = dmul(Gamma{5}, so[b,r]) end - + return nothing end -export Dw!, DwdagDw! +function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D} + + if abs(dpar.csw) > 1.0E-10 + @timeit "g5Dw" begin + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp) + end + end + else + @timeit "g5Dw" begin + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.th, dpar.ct, lp) + end + end + end + + return nothing +end + +function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D} + + # The field si is assumed to be zero at t = 0 + + b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x) + + if (point_time((b,r),lp) != 1) + + bu1, ru1 = up((b,r), 1, lp) + bd1, rd1 = dw((b,r), 1, lp) + bu2, ru2 = up((b,r), 2, lp) + bd2, rd2 = dw((b,r), 2, lp) + bu3, ru3 = up((b,r), 3, lp) + bd3, rd3 = dw((b,r), 3, lp) + bu4, ru4 = up((b,r), 4, lp) + bd4, rd4 = dw((b,r), 4, lp) + + @inbounds begin + + so[b,r] = (4+m0)*si[b,r] + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r]) + +Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])) + + + so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) + + th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) + + th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) + + th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) ) + + if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4]) + so[b,r] += (ct-1.0)*si[b,r] + end + end + end + + so[b,r] = dmul(Gamma{5}, so[b,r]) + + return nothing +end + +function krnl_g5Dw!(so, U, si, m0, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D} + + # The field si is assumed to be zero at t = 0 + + b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x) + + if (point_time((b,r),lp) != 1) + + bu1, ru1 = up((b,r), 1, lp) + bd1, rd1 = dw((b,r), 1, lp) + bu2, ru2 = up((b,r), 2, lp) + bd2, rd2 = dw((b,r), 2, lp) + bu3, ru3 = up((b,r), 3, lp) + bd3, rd3 = dw((b,r), 3, lp) + bu4, ru4 = up((b,r), 4, lp) + bd4, rd4 = dw((b,r), 4, lp) + + @inbounds begin + + so[b,r] = (4+m0)*si[b,r] + so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) + + th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) + + th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) + + th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) ) + + if (point_time((b,r),lp) == 2) || (point_time((b,r),lp) == lp.iL[4]) + so[b,r] += (ct-1.0)*si[b,r] + end + end + end + + so[b,r] = dmul(Gamma{5}, so[b,r]) + + return nothing +end + +function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D} + + if abs(dpar.csw) > 1.0E-10 + @timeit "DwdagDw" begin + + @timeit "g5Dw" begin + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp) + end + end + + @timeit "g5Dw" begin + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp) + end + end + end + else + @timeit "DwdagDw" begin + + @timeit "g5Dw" begin + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.th, dpar.ct, lp) + end + end + + @timeit "g5Dw" begin + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, dpar.th, dpar.ct, lp) + end + end + end + end + + return nothing +end + +function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D} + + if abs(dpar.csw) > 1.0E-10 + @timeit "DwdagDw" begin + + @timeit "g5Dw" begin + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, lp) + end + end + + @timeit "g5Dw" begin + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, dpar.th, dpar.csw, lp) + end + end + end + else + @timeit "DwdagDw" begin + + @timeit "g5Dw" begin + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.th, lp) + end + end + + @timeit "g5Dw" begin + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, dpar.th, lp) + end + end + end + end + + return nothing +end + + +function SF_bndfix!(sp, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D} + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(sp, lp) + end + + return nothing +end + +function krnl_sfbndfix!(sp,lp::SpaceParm) + b=Int64(CUDA.threadIdx().x) + r=Int64(CUDA.blockIdx().x) + + if (point_time((b,r),lp) == 1) + sp[b,r] = 0.0*sp[b,r] + end + return nothing +end + + +""" + function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund / SU2fund {T}}}, lp::SpaceParm, t::Int64 = 0) + +Randomizes the SU2fund / SU3fund fermion field. If the argument t is present, it only randomizes that time-slice. +""" +function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::SpaceParm, t::Int64 = 0) where {T} + + @timeit "Randomize pseudofermion field" begin + p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4) # complex generation not suported for Julia 1.5.4 + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su3!(f,p,lp,t) + end + end + + return nothing +end + +function krnl_assign_pf_su3!(f::AbstractArray, p , lp::SpaceParm, t::Int64) + + @inbounds begin + b = Int64(CUDA.threadIdx().x) + r = Int64(CUDA.blockIdx().x) + + if t == 0 + f[b,r] = Spinor(map(x->SU3fund(x[b,1,r,1] + im* x[b,1,r,2], + x[b,2,r,1] + im* x[b,2,r,2], + x[b,3,r,1] + im* x[b,3,r,2]),p)) + elseif point_time((b,r),lp) == t + f[b,r] = Spinor(map(x->SU3fund(x[b,1,r,1] + im* x[b,1,r,2], + x[b,2,r,1] + im* x[b,2,r,2], + x[b,3,r,1] + im* x[b,3,r,2]),p)) + end + + end + + return nothing +end + +function pfrandomize!(f::AbstractArray{Spinor{4, SU2fund{T}}},lp::SpaceParm, t::Int64=0) where {T} + + @timeit "Randomize pseudofermion field" begin + p = ntuple(i->CUDA.randn(T, lp.bsz, 2, lp.rsz,2),4) # complex generation not suported for Julia 1.5.4 + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su2!(f,p,lp,t) + end + end + + return nothing +end + +function krnl_assign_pf_su2!(f::AbstractArray, p , lp::SpaceParm, t::Int64) + + @inbounds begin + b = Int64(CUDA.threadIdx().x) + r = Int64(CUDA.blockIdx().x) + + if t == 0 + f[b,r] = Spinor(map(x->SU2fund(x[b,1,r,1] + im* x[b,1,r,2], + x[b,2,r,1] + im* x[b,2,r,2]),p)) + elseif point_time((b,r),lp) == t + f[b,r] = Spinor(map(x->SU2fund(x[b,1,r,1] + im* x[b,1,r,2], + x[b,2,r,1] + im* x[b,2,r,2]),p)) + end + + end + + return nothing +end + +export Dw!, g5Dw!, DwdagDw!, SF_bndfix!, Csw!, pfrandomize! + + +include("DiracIO.jl") +export read_prop, save_prop, read_dpar + end diff --git a/src/Dirac/DiracIO.jl b/src/Dirac/DiracIO.jl new file mode 100644 index 0000000..0c5da46 --- /dev/null +++ b/src/Dirac/DiracIO.jl @@ -0,0 +1,183 @@ + +""" + read_prop(fname::String) + +Reads propagator from file `fname` using the native (BDIO) format. +""" +function read_prop(fname::String) + + UID_HDR = 14 + fb = BDIO_open(fname, "r") + while BDIO_get_uinfo(fb) != UID_HDR + BDIO_seek!(fb) + end + ihdr = Vector{Int32}(undef, 2) + BDIO_read(fb, ihdr) + if (ihdr[1] != convert(Int32, 1753996112)) && (ihdr[2] != convert(Int32, 5)) + error("Wrong file format [header]") + end + + run = BDIO.BDIO_read_str(fb) + + while BDIO_get_uinfo(fb) != 1 + BDIO_seek!(fb) + end + + + ifoo = Vector{Int32}(undef, 2) + BDIO_read(fb, ifoo) + ndim = convert(Int64, ifoo[1]) + npls = convert(Int64, round(ndim*(ndim-1)/2)) + ibc = convert(Int64, ifoo[2]) + + ifoo = Vector{Int32}(undef, ndim+convert(Int32, npls)) + BDIO_read(fb, ifoo) + iL = ntuple(i -> convert(Int64, ifoo[i]),ndim) + ntw = ntuple(i -> convert(Int64, ifoo[i+ndim]), npls) + + foopars = Vector{Float64}(undef, 4) + BDIO_read(fb, foopars) + + footh = Vector{Float64}(undef, 4) + + lp = SpaceParm{ndim}(iL, (4,4,4,4), ibc, ntw) + dpar = DiracParam{Float64}(SU3fund,foopars[1],foopars[2],ntuple(i -> footh[i], 4),foopars[3]) + + + dtr = (2,3,4,1) + + while BDIO_get_uinfo(fb) != 8 + BDIO_seek!(fb) + end + + psicpu = Array{Spinor{4,SU3fund{Float64}}, 2}(undef, lp.bsz, lp.rsz) + + spvec = Vector{ComplexF64}(undef,12) + + for i4 in 1:lp.iL[4] + for i1 in 1:lp.iL[1] + for i2 in 1:lp.iL[2] + for i3 in 1:lp.iL[3] + b, r = point_index(CartesianIndex(i1,i2,i3,i4), lp) + + BDIO_read(fb, spvec) + psicpu[b,r] = Spinor{4,SU3fund{Float64}}(ntuple(i -> SU3fund{Float64}(spvec[Int(3*i - 2)],spvec[Int(3*i - 1)] , spvec[Int(3*i)] ),4)) + + end + end + end + end + + + BDIO_close!(fb) + return CuArray(psicpu) +end + + +""" + save_prop(fname, psi, lp::SpaceParm, dpar::DiracParam; run::Union{Nothing,String}=nothing) + +Saves propagator `psi` in the file `fname` using the native (BDIO) format. +""" +function save_prop(fname::String, psi, lp::SpaceParm{4,M,B,D}, dpar::DiracParam; run::Union{Nothing,String}=nothing) where {M,B,D} + + ihdr = [convert(Int32, 1753996112),convert(Int32,5)] + UID_HDR = 14 + + if isfile(fname) + fb = BDIO_open(fname, "a") + else + fb = BDIO_open(fname, "w", "Propagator of LatticeGPU.jl") + BDIO_start_record!(fb, BDIO_BIN_GENERIC, UID_HDR) + BDIO_write!(fb, ihdr) + if run != nothing + BDIO_write!(fb, run*"\0") + end + BDIO_write_hash!(fb) + + BDIO_start_record!(fb, BDIO_BIN_GENERIC, 1) + BDIO_write!(fb, [convert(Int32, 4)]) + BDIO_write!(fb, [convert(Int32, B)]) + BDIO_write!(fb, [convert(Int32, lp.iL[i]) for i in 1:4]) + BDIO_write!(fb, [convert(Int32, lp.ntw[i]) for i in 1:M]) + BDIO_write!(fb, [dpar.m0, dpar.csw, dpar.ct]) + BDIO_write!(fb, [dpar.th[i] for i in 1:4]) + end + BDIO_write_hash!(fb) + + dtr = (2,3,4,1) + + BDIO_start_record!(fb, BDIO_BIN_F64LE, 8, true) + psicpu = Array(psi) + + for i4 in 1:lp.iL[4] + for i1 in 1:lp.iL[1] + for i2 in 1:lp.iL[2] + for i3 in 1:lp.iL[3] + b, r = point_index(CartesianIndex(i1,i2,i3,i4), lp) + for sp in 1:4 + for c in (:t1,:t2,:t3) + BDIO_write!(fb, [getfield(psicpu[b,r].s[sp],c)]) + end + end + end + end + end + end + + BDIO_write_hash!(fb) + BDIO_close!(fb) + + return nothing +end + + + +""" + read_dpar(fname::String) + +Reads Dirac parameters from file `fname` using the native (BDIO) format. Returns DiracParam and SpaceParm. +""" +function read_dpar(fname::String) + + UID_HDR = 14 + fb = BDIO_open(fname, "r") + while BDIO_get_uinfo(fb) != UID_HDR + BDIO_seek!(fb) + end + ihdr = Vector{Int32}(undef, 2) + BDIO_read(fb, ihdr) + if (ihdr[1] != convert(Int32, 1753996112)) && (ihdr[2] != convert(Int32, 5)) + error("Wrong file format [header]") + end + + run = BDIO.BDIO_read_str(fb) + + while BDIO_get_uinfo(fb) != 1 + BDIO_seek!(fb) + end + + + ifoo = Vector{Int32}(undef, 2) + BDIO_read(fb, ifoo) + ndim = convert(Int64, ifoo[1]) + npls = convert(Int64, round(ndim*(ndim-1)/2)) + ibc = convert(Int64, ifoo[2]) + + ifoo = Vector{Int32}(undef, ndim+convert(Int32, npls)) + BDIO_read(fb, ifoo) + iL = ntuple(i -> convert(Int64, ifoo[i]),ndim) + ntw = ntuple(i -> convert(Int64, ifoo[i+ndim]), npls) + + foopars = Vector{Float64}(undef, 4) + BDIO_read(fb, foopars) + + footh = Vector{Float64}(undef, 4) + + lp = SpaceParm{ndim}(iL, (4,4,4,4), ibc, ntw) + dpar = DiracParam{Float64}(SU3fund,foopars[1],foopars[2],ntuple(i -> footh[i], 4),foopars[3]) + + + BDIO_close!(fb) + return dpar, lp +end \ No newline at end of file diff --git a/src/Fields/Fields.jl b/src/Fields/Fields.jl index 5f20d7d..2d9e307 100644 --- a/src/Fields/Fields.jl +++ b/src/Fields/Fields.jl @@ -43,7 +43,15 @@ Returns a scalar field of elemental type `T`, with lexicografic memory layout. """ scalar_field_point(::Type{T}, lp::SpaceParm{N,M,D}) where {T,N,M,D} = CuArray{T, N}(undef, lp.iL...) -export vector_field, scalar_field, nscalar_field, scalar_field_point +""" + tensor_field(::Type{T}, lp::SpaceParm) + +Returns a tensor field of elemental type `T`. +""" +tensor_field(::Type{T}, lp::SpaceParm) where {T} = CuArray{T, 3}(undef, lp.bsz, lp.npls, lp.rsz) + + +export vector_field, scalar_field, nscalar_field, scalar_field_point, tensor_field end diff --git a/src/Groups/AlgebraU3.jl b/src/Groups/AlgebraU3.jl new file mode 100644 index 0000000..fa03579 --- /dev/null +++ b/src/Groups/AlgebraU3.jl @@ -0,0 +1,33 @@ + + + +struct U3alg{T} <: Algebra + u11::T + u22::T + u33::T + u12::Complex{T} + u13::Complex{T} + u23::Complex{T} +end + +function antsym(a::SU3{T}) where T <: AbstractFloat + t1 = 2.0*imag(a.u11) + t2 = 2.0*imag(a.u22) + t3 = 2.0*imag(a.u12*a.u21 - a.u11*a.u22) + t4 = a.u12 - conj(a.u21) + t5 = a.u13 - (a.u12*a.u23 - a.u13*a.u22) + t6 = a.u23 - (a.u13*a.u21 - a.u11*a.u23) + + return U3alg{T}(t1,t2,t3,t4,t5,t6) +end + + +Base.:*(a::U3alg{T},b::SU3fund{T}) where T <: AbstractFloat = SU3fund{T}(im*a.u11*b.t1 + a.u12*b.t2 + a.u13*b.t3, + -conj(a.u12)*b.t1 + im*a.u22*b.t2 + a.u23*b.t3, + -conj(a.u13)*b.t1 - conj(a.u23)*b.t2 + im*a.u33*b.t3) + +Base.:+(a::U3alg{T},b::U3alg{T}) where T <: AbstractFloat = U3alg{T}(a.u11 + b.u11, a.u22 + b.u22, a.u33 + b.u33, a.u12 + b.u12, a.u13 + b.u13, a.u23 + b.u23) + + +Base.:*(r::Number, a::U3alg{T}) where T <: AbstractFloat = U3alg{T}(r*a.u11, r*a.u22, r*a.u33, r*a.u12, r*a.u13, r*a.u23) + diff --git a/src/Groups/FundamentalSU3.jl b/src/Groups/FundamentalSU3.jl index 987a5ce..d429d5d 100644 --- a/src/Groups/FundamentalSU3.jl +++ b/src/Groups/FundamentalSU3.jl @@ -24,14 +24,14 @@ dag(a::SU3fund{T}) where T <: AbstractFloat = SU3fund{T}(conj(a. Returns the norm of a fundamental element. Same result as dot(a,a). """ -norm(a::SU3fund{T}) where T <: AbstractFloat = sqrt((abs2(a.t1) + abs2(a.t2) + abs2(a.t1))) +norm(a::SU3fund{T}) where T <: AbstractFloat = sqrt((abs2(a.t1) + abs2(a.t2) + abs2(a.t3))) """ norm(a::SU3fund{T}) Returns the norm of a fundamental element. Same result as sqrt(dot(a,a)). """ -norm2(a::SU3fund{T}) where T <: AbstractFloat = (abs2(a.t1) + abs2(a.t2) + abs2(a.t1)) +norm2(a::SU3fund{T}) where T <: AbstractFloat = (abs2(a.t1) + abs2(a.t2) + abs2(a.t3)) """ dot(a::SU3fund{T},b::SU3fund{T}) diff --git a/src/Groups/Groups.jl b/src/Groups/Groups.jl index 34ee296..3ed40ca 100644 --- a/src/Groups/Groups.jl +++ b/src/Groups/Groups.jl @@ -74,11 +74,14 @@ include("FundamentalSU3.jl") export imm, mimm ## END SU(3) +include("AlgebraU3.jl") +export U3alg + include("GroupU1.jl") export U1, U1alg -export dot, expm, exp, dag, unitarize, inverse, tr, projalg, norm, norm2, isgroup, alg2mat, dev_one +export dot, expm, exp, dag, unitarize, inverse, tr, projalg, norm, norm2, isgroup, alg2mat, dev_one, antsym end # module diff --git a/src/LatticeGPU.jl b/src/LatticeGPU.jl index 99306dd..bb640f2 100644 --- a/src/LatticeGPU.jl +++ b/src/LatticeGPU.jl @@ -16,8 +16,8 @@ include("Groups/Groups.jl") using .Groups export Group, Algebra, GMatrix -export SU2, SU2alg, SU3, SU3alg, M3x3, M2x2, U1, U1alg, SU3fund -export dot, expm, exp, dag, unitarize, inverse, tr, projalg, norm, norm2, isgroup, alg2mat, dev_one +export SU2, SU2alg, SU3, SU3alg, M3x3, M2x2, U1, U1alg, SU3fund, U3alg +export dot, expm, exp, dag, unitarize, inverse, tr, projalg, norm, norm2, isgroup, alg2mat, dev_one, antsym include("Space/Space.jl") @@ -28,7 +28,7 @@ export BC_PERIODIC, BC_OPEN, BC_SF_AFWB, BC_SF_ORBI include("Fields/Fields.jl") using .Fields -export vector_field, scalar_field, nscalar_field, scalar_field_point +export vector_field, scalar_field, nscalar_field, scalar_field_point, tensor_field include("MD/MD.jl") using .MD @@ -56,12 +56,13 @@ export pmul, gpmul, gdagpmul, dmul include("Dirac/Dirac.jl") using .Dirac -export DiracWorkspace -export Dw!, DwdagDw! - +export DiracWorkspace, DiracParam +export Dw!, g5Dw!, DwdagDw!, SF_bndfix!, Csw!, pfrandomize! +export read_prop, save_prop, read_dpar include("Solvers/Solvers.jl") using .Solvers export CG! +export propagator!, bndpropagator!, Tbndpropagator!, bndtobnd end # module diff --git a/src/Solvers/CG.jl b/src/Solvers/CG.jl index 4d58916..8e86c7b 100644 --- a/src/Solvers/CG.jl +++ b/src/Solvers/CG.jl @@ -14,21 +14,43 @@ Solves the linear equation `Ax = si` """ -function CG!(si, U, m0, A, lp::SpaceParm, dws::DiracWorkspace) +function krnl_dot!(sum,fone,ftwo) + b=Int64(CUDA.threadIdx().x) + r=Int64(CUDA.blockIdx().x) + + sum[b,r] = dot(fone[b,r],ftwo[b,r]) + +return nothing +end + +function field_dot(fone::AbstractArray,ftwo::AbstractArray,sumf,lp) where {T} + + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_dot!(sumf,fone,ftwo) + end + + return sum(sumf) +end + +function CG!(si, U, A, dpar::DiracParam, lp::SpaceParm, dws::DiracWorkspace{T}, maxiter::Int64 = 10, tol=1.0) where {T} dws.sr .= si dws.sp .= si norm = CUDA.mapreduce(x -> norm2(x), +, si) - fill!(si,zero(eltype(so))) + fill!(si,zero(eltype(si))) err = 0.0 - - tol = eps * norm + + tol = tol * norm + iterations = 0 + sumf = scalar_field(Complex{T}, lp) niter = 0 for i in 1:maxiter - A(dws.sAp, U, dws.sp, am0, dws.st, lp) - prod = CUDA.mapreduce(x -> dot(x[1],x[2]), +, zip(dws.sp, dws.sAp)) + A(dws.sAp, U, dws.sp, dpar, dws, lp) + + prod = field_dot(dws.sp,dws.sAp,sumf,lp) + alpha = norm/prod si .= si .+ alpha .* dws.sp @@ -52,4 +74,4 @@ function CG!(si, U, m0, A, lp::SpaceParm, dws::DiracWorkspace) end return niter -end +end \ No newline at end of file diff --git a/src/Solvers/Propagators.jl b/src/Solvers/Propagators.jl new file mode 100644 index 0000000..dc42f5b --- /dev/null +++ b/src/Solvers/Propagators.jl @@ -0,0 +1,177 @@ + + + +""" + function propagator!(pro,U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm, maxiter::Int64, tol::Float64, y::NTuple{4,Int64}, c::Int64, s::Int64) + +Saves the fermionic progapator in pro for a source at point `y` with color `c` and spin `s`. If the last three arguments are replaced by `time::Int64`, the source is replaced +by a random source in spin and color at t = `time`. + +""" +function propagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm, maxiter::Int64, tol::Float64, y::NTuple{4,Int64}, c::Int64, s::Int64) where {T} + + function krnlg5!(src) + b=Int64(CUDA.threadIdx().x) + r=Int64(CUDA.blockIdx().x) + src[b,r] = dmul(Gamma{5},src[b,r]) + return nothing + end + + fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp)))) + + CUDA.@allowscalar dws.sp[point_index(CartesianIndex{lp.ndim}(y),lp)...] = Spinor{4,SU3fund{Float64}}(ntuple(i -> (i==s)*SU3fund{Float64}(ntuple(j -> (j==c)*1.0,3)...),4)) + + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp) + end + + g5Dw!(pro,U,dws.sp,dpar,dws,lp) + + CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol) + return nothing +end + +function propagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm, maxiter::Int64, tol::Float64, time::Int64) where {T} + + function krnlg5!(src) + b=Int64(CUDA.threadIdx().x) + r=Int64(CUDA.blockIdx().x) + src[b,r] = dmul(Gamma{5},src[b,r]) + return nothing + end + + pfrandomize!(dws.sp,lp,time) + + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp) + end + + g5Dw!(pro,U,dws.sp,dpar,dws,lp) + + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp) + end + + CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol) + return nothing +end + +""" + + function bndpropagator!(pro,U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64) + +Saves the propagator in from the t=0 boundary to the bulk for the SF boundary conditions for a source with color 'c' and spin 's'. The factor c_t is included while the factor 1/sqrt(V) is not. +For the propagator from T to the bulk, use the function Tbndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64) + +""" +function bndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64) where {T,D} + + function krnlg5!(src) + b=Int64(CUDA.threadIdx().x) + r=Int64(CUDA.blockIdx().x) + src[b,r] = dmul(Gamma{5},src[b,r]) + return nothing + end + + function krnl_assign_bndsrc!(src,U,lp::SpaceParm, c::Int64, s::Int64) + b=Int64(CUDA.threadIdx().x) + r=Int64(CUDA.blockIdx().x) + + if (point_time((b,r),lp) == 2) + bd4, rd4 = dw((b,r), 4, lp) + src[b,r] = gdagpmul(Pgamma{4,1},U[bd4,4,rd4],Spinor{4,SU3fund{Float64}}(ntuple(i -> (i==s)*SU3fund{Float64}(ntuple(j -> (j==c)*1.0,3)...),4)))/2 + end + + return nothing + end + + fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp)))) + + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_bndsrc!(dws.sp, U, lp, c, s) + end + + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp) + end + + g5Dw!(pro,U,dpar.ct*dws.sp,dpar,dws,lp) + + CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol) + return pro +end + +""" + + function Tbndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64) + +Returns the propagator from the t=T boundary to the bulk for the SF boundary conditions for a source with color 'c' and spin 's'. The factor c_t is included while the factor 1/sqrt(V) is not. +For the propagator from t=0 to the bulk, use the function bndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64) + +""" +function Tbndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64) where {T,D} + + function krnlg5!(src) + b=Int64(CUDA.threadIdx().x) + r=Int64(CUDA.blockIdx().x) + src[b,r] = dmul(Gamma{5},src[b,r]) + return nothing + end + + function krnl_assign_bndsrc!(src,U,lp::SpaceParm, c::Int64, s::Int64) + b=Int64(CUDA.threadIdx().x) + r=Int64(CUDA.blockIdx().x) + + if (point_time((b,r),lp) == lp.iL[end]) + src[b,r] = gpmul(Pgamma{4,-1},U[b,4,r],Spinor{4,SU3fund{Float64}}(ntuple(i -> (i==s)*SU3fund{Float64}(ntuple(j -> (j==c)*1.0,3)...),4)))/2 + end + + return nothing + end + + fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp)))) + + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_bndsrc!(dws.sp, U, lp, c, s) + end + + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp) + end + + g5Dw!(pro,U,dpar.ct*dws.sp,dpar,dws,lp) + + CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol) + return pro +end + + +""" + function bndtobnd(bndpro, U, dpar, dws, lp) + +Returns the boundary to boundary propagator of the Schrodinger functional given that bndpro is the propagator from t = 0 to the bulk, given by the function bndpropagator!. + +""" +function bndtobnd(bndpro::AbstractArray, U::AbstractArray, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}) where {D} + + function krnl_bndtobnd!(psi::AbstractArray, bndp::AbstractArray, U::AbstractArray, lp::SpaceParm) + b=Int64(CUDA.threadIdx().x) + r=Int64(CUDA.blockIdx().x) + + if point_time((b, r), lp) == lp.iL[end] + psi[b,r] = gdagpmul(Pgamma{4,1},U[b,4,r],bndpro[b,r])/2 + else + psi[b,r] = 0.0*bndpro[b,r] + end + + return nothing + end + + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_bndtobnd!(dws.sp, bndpro ,U, lp) + end + + res = -dpar.ct * sum(dws.sp) / (lp.iL[1]*lp.iL[2]*lp.iL[3]) + + return res +end diff --git a/src/Solvers/Solvers.jl b/src/Solvers/Solvers.jl index 900163b..edcd5ae 100644 --- a/src/Solvers/Solvers.jl +++ b/src/Solvers/Solvers.jl @@ -22,7 +22,8 @@ using ..Dirac include("CG.jl") export CG! - +include("Propagators.jl") +export propagator!, bndpropagator!, Tbndpropagator!, bndtobnd end diff --git a/src/Spinors/Spinors.jl b/src/Spinors/Spinors.jl index ffc934b..6b8a46b 100644 --- a/src/Spinors/Spinors.jl +++ b/src/Spinors/Spinors.jl @@ -50,7 +50,7 @@ Returns the scalar product of two spinors. sum = :(dot(a.s[1],b.s[1])) for i in 2:NS - sum = :($sum + norm2(a.s[$i])) + sum = :($sum + dot(a.s[$i],b.s[$i])) end return :($sum) @@ -64,6 +64,21 @@ Returns ga """ Base.:*(g::S,b::Spinor{NS,G}) where {S <: Group,NS,G} = Spinor{NS,G}(ntuple(i->g*b.s[i], NS)) +""" + *(a::SU3alg{T},b::Spinor) + +Returns ab +""" +Base.:*(a::S,b::Spinor{NS,G}) where {S <: Algebra,NS,G} = Spinor{NS,G}(ntuple(i->a*b.s[i], NS)) + +""" + *(a::M3x3{T},b::Spinor) + +Returns ab +""" +Base.:*(a::S,b::Spinor{NS,G}) where {S <: GMatrix,NS,G} = Spinor{NS,G}(ntuple(i->a*b.s[i], NS)) + + """ \\(g::SU3{T},b::Spinor{NS,G}) @@ -75,9 +90,9 @@ Base.:\(g::S,b::Spinor{NS,G}) where {S <: Group,NS,G} = Spinor{NS,G}(ntuple(i->g Base.:+(a::Spinor{NS,G},b::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(ntuple(i->a.s[i]+b.s[i], NS)) Base.:-(a::Spinor{NS,G},b::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(ntuple(i->a.s[i]-b.s[i], NS)) Base.:+(a::Spinor{NS,G}) where {NS,G} = a -Base.:-(a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(ntuple(i->-b.s[i], NS)) -imm(a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(ntuple(i->imm(b.s[i]), NS)) -mimm(a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(ntuple(i->mimm(b.s[i]), NS)) +Base.:-(a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(ntuple(i->-a.s[i], NS)) +imm(a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(ntuple(i->imm(a.s[i]), NS)) +mimm(a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(ntuple(i->mimm(a.s[i]), NS)) # Operations with numbers @@ -100,56 +115,56 @@ end Returns ``(1+s\\gamma_N)a``. """ -@inline function pmul(::Type{Pgamma{4,1}}, a::Spinor{4,G}) where {NS,G} +@inline function pmul(::Type{Pgamma{4,-1}}, a::Spinor{NS,G}) where {NS,G} r1 = a.s[1]+a.s[3] r2 = a.s[2]+a.s[4] - return Spinor{4,G}((r1,r2,r1,r2)) + return Spinor{NS,G}((r1,r2,r1,r2)) end -@inline function pmul(::Type{Pgamma{4,-1}}, a::Spinor{4,G}) where {NS,G} +@inline function pmul(::Type{Pgamma{4,1}}, a::Spinor{NS,G}) where {NS,G} r1 = a.s[1]-a.s[3] r2 = a.s[2]-a.s[4] - return Spinor{4,G}((r1,r2,-r1,-r2)) + return Spinor{NS,G}((r1,r2,-r1,-r2)) end -@inline function pmul(::Type{Pgamma{1,1}}, a::Spinor{4,G}) where {NS,G} +@inline function pmul(::Type{Pgamma{1,-1}}, a::Spinor{NS,G}) where {NS,G} r1 = a.s[1]+imm(a.s[4]) r2 = a.s[2]+imm(a.s[3]) - return Spinor{4,G}((r1,r2,mimm(r2),mimm(r1))) + return Spinor{NS,G}((r1,r2,mimm(r2),mimm(r1))) end -@inline function pmul(::Type{Pgamma{1,-1}}, a::Spinor{4,G}) where {NS,G} +@inline function pmul(::Type{Pgamma{1,1}}, a::Spinor{NS,G}) where {NS,G} r1 = a.s[1]-imm(a.s[4]) r2 = a.s[2]-imm(a.s[3]) - return Spinor{4,G}((r1,r2,imm(r2),imm(r1))) + return Spinor{NS,G}((r1,r2,imm(r2),imm(r1))) end -@inline function pmul(::Type{Pgamma{2,1}}, a::Spinor{4,G}) where {NS,G} +@inline function pmul(::Type{Pgamma{2,-1}}, a::Spinor{NS,G}) where {NS,G} r1 = a.s[1]+a.s[4] r2 = a.s[2]-a.s[3] - return Spinor{4,G}((r1,r2,-r2,r1)) + return Spinor{NS,G}((r1,r2,-r2,r1)) end -@inline function pmul(::Type{Pgamma{2,-1}}, a::Spinor{4,G}) where {NS,G} +@inline function pmul(::Type{Pgamma{2,1}}, a::Spinor{NS,G}) where {NS,G} r1 = a.s[1]-a.s[4] r2 = a.s[2]+a.s[3] - return Spinor{4,G}((r1,r2,r2,-r1)) + return Spinor{NS,G}((r1,r2,r2,-r1)) end -@inline function pmul(::Type{Pgamma{3,1}}, a::Spinor{4,G}) where {NS,G} +@inline function pmul(::Type{Pgamma{3,-1}}, a::Spinor{NS,G}) where {NS,G} r1 = a.s[1]+imm(a.s[3]) r2 = a.s[2]-imm(a.s[4]) - return Spinor{4,G}((r1,r2,mimm(r1),imm(r2))) + return Spinor{NS,G}((r1,r2,mimm(r1),imm(r2))) end -@inline function pmul(::Type{Pgamma{3,-1}}, a::Spinor{4,G}) where {NS,G} +@inline function pmul(::Type{Pgamma{3,1}}, a::Spinor{NS,G}) where {NS,G} r1 = a.s[1]-imm(a.s[3]) r2 = a.s[2]+imm(a.s[4]) - return Spinor{4,G}((r1,r2,imm(r1),mimm(r2))) + return Spinor{NS,G}((r1,r2,imm(r1),mimm(r2))) end @@ -158,56 +173,56 @@ end Returns ``g(1+s\\gamma_N)a`` """ -@inline function gpmul(::Type{Pgamma{4,1}}, g, a::Spinor{4,G}) where {NS,G} +@inline function gpmul(::Type{Pgamma{4,-1}}, g, a::Spinor{NS,G}) where {NS,G} r1 = g*(a.s[1]+a.s[3]) r2 = g*(a.s[2]+a.s[4]) - return Spinor{4,G}((r1,r2,r1,r2)) + return Spinor{NS,G}((r1,r2,r1,r2)) end -@inline function gpmul(::Type{Pgamma{4,-1}}, g, a::Spinor{4,G}) where {NS,G} +@inline function gpmul(::Type{Pgamma{4,1}}, g, a::Spinor{NS,G}) where {NS,G} r1 = g*(a.s[1]-a.s[3]) r2 = g*(a.s[2]-a.s[4]) - return Spinor{4,G}((r1,r2,-r1,-r2)) + return Spinor{NS,G}((r1,r2,-r1,-r2)) end -@inline function gpmul(::Type{Pgamma{1,1}}, g, a::Spinor{4,G}) where {NS,G} +@inline function gpmul(::Type{Pgamma{1,-1}}, g, a::Spinor{NS,G}) where {NS,G} r1 = g*(a.s[1]+imm(a.s[4])) r2 = g*(a.s[2]+imm(a.s[3])) - return Spinor{4,G}((r1,r2,mimm(r2),mimm(r1))) + return Spinor{NS,G}((r1,r2,mimm(r2),mimm(r1))) end -@inline function gpmul(::Type{Pgamma{1,-1}}, g, a::Spinor{4,G}) where {NS,G} +@inline function gpmul(::Type{Pgamma{1,1}}, g, a::Spinor{NS,G}) where {NS,G} r1 = g*(a.s[1]-imm(a.s[4])) r2 = g*(a.s[2]-imm(a.s[3])) - return Spinor{4,G}((r1,r2,imm(r2),imm(r1))) + return Spinor{NS,G}((r1,r2,imm(r2),imm(r1))) end -@inline function gpmul(::Type{Pgamma{2,1}}, g, a::Spinor{4,G}) where {NS,G} +@inline function gpmul(::Type{Pgamma{2,-1}}, g, a::Spinor{NS,G}) where {NS,G} r1 = g*(a.s[1]+a.s[4]) r2 = g*(a.s[2]-a.s[3]) - return Spinor{4,G}((r1,r2,-r2,r1)) + return Spinor{NS,G}((r1,r2,-r2,r1)) end -@inline function gpmul(::Type{Pgamma{2,-1}}, g, a::Spinor{4,G}) where {NS,G} +@inline function gpmul(::Type{Pgamma{2,1}}, g, a::Spinor{NS,G}) where {NS,G} r1 = g*(a.s[1]-a.s[4]) r2 = g*(a.s[2]+a.s[3]) - return Spinor{4,G}((r1,r2,r2,-r1)) + return Spinor{NS,G}((r1,r2,r2,-r1)) end -@inline function gpmul(::Type{Pgamma{3,1}}, g, a::Spinor{4,G}) where {NS,G} +@inline function gpmul(::Type{Pgamma{3,-1}}, g, a::Spinor{NS,G}) where {NS,G} r1 = g*(a.s[1]+imm(a.s[3])) r2 = g*(a.s[2]-imm(a.s[4])) - return Spinor{4,G}((r1,r2,mimm(r1),imm(r2))) + return Spinor{NS,G}((r1,r2,mimm(r1),imm(r2))) end -@inline function gpmul(::Type{Pgamma{3,-1}}, g, a::Spinor{4,G}) where {NS,G} +@inline function gpmul(::Type{Pgamma{3,1}}, g, a::Spinor{NS,G}) where {NS,G} r1 = g*(a.s[1]-imm(a.s[3])) r2 = g*(a.s[2]+imm(a.s[4])) - return Spinor{4,G}((r1,r2,imm(r1),mimm(r2))) + return Spinor{NS,G}((r1,r2,imm(r1),mimm(r2))) end """ @@ -215,56 +230,56 @@ end Returns ``g^+ (1+s\\gamma_N)a`` """ -@inline function gdagpmul(::Type{Pgamma{4,1}}, g, a::Spinor{4,G}) where {NS,G} +@inline function gdagpmul(::Type{Pgamma{4,-1}}, g, a::Spinor{NS,G}) where {NS,G} r1 = g\(a.s[1]+a.s[3]) r2 = g\(a.s[2]+a.s[4]) - return Spinor{4,G}((r1,r2,r1,r2)) + return Spinor{NS,G}((r1,r2,r1,r2)) end -@inline function gdagpmul(::Type{Pgamma{4,-1}}, g, a::Spinor{4,G}) where {NS,G} +@inline function gdagpmul(::Type{Pgamma{4,1}}, g, a::Spinor{NS,G}) where {NS,G} r1 = g\(a.s[1]-a.s[3]) r2 = g\(a.s[2]-a.s[4]) - return Spinor{4,G}((r1,r2,-r1,-r2)) + return Spinor{NS,G}((r1,r2,-r1,-r2)) end -@inline function gdagpmul(::Type{Pgamma{1,1}}, g, a::Spinor{4,G}) where {NS,G} +@inline function gdagpmul(::Type{Pgamma{1,-1}}, g, a::Spinor{NS,G}) where {NS,G} r1 = g\(a.s[1]+imm(a.s[4])) r2 = g\(a.s[2]+imm(a.s[3])) - return Spinor{4,G}((r1,r2,mimm(r2),mimm(r1))) + return Spinor{NS,G}((r1,r2,mimm(r2),mimm(r1))) end -@inline function gdagpmul(::Type{Pgamma{1,-1}}, g, a::Spinor{4,G}) where {NS,G} +@inline function gdagpmul(::Type{Pgamma{1,1}}, g, a::Spinor{NS,G}) where {NS,G} r1 = g\(a.s[1]-imm(a.s[4])) r2 = g\(a.s[2]-imm(a.s[3])) - return Spinor{4,G}((r1,r2,imm(r2),imm(r1))) + return Spinor{NS,G}((r1,r2,imm(r2),imm(r1))) end -@inline function gdagpmul(::Type{Pgamma{2,1}}, g, a::Spinor{4,G}) where {NS,G} +@inline function gdagpmul(::Type{Pgamma{2,-1}}, g, a::Spinor{NS,G}) where {NS,G} r1 = g\(a.s[1]+a.s[4]) r2 = g\(a.s[2]-a.s[3]) - return Spinor{4,G}((r1,r2,-r2,r1)) + return Spinor{NS,G}((r1,r2,-r2,r1)) end -@inline function gdagpmul(::Type{Pgamma{2,-1}}, g, a::Spinor{4,G}) where {NS,G} +@inline function gdagpmul(::Type{Pgamma{2,1}}, g, a::Spinor{NS,G}) where {NS,G} r1 = g\(a.s[1]-a.s[4]) r2 = g\(a.s[2]+a.s[3]) - return Spinor{4,G}((r1,r2,r2,-r1)) + return Spinor{NS,G}((r1,r2,r2,-r1)) end -@inline function gdagpmul(::Type{Pgamma{3,1}}, g, a::Spinor{4,G}) where {NS,G} +@inline function gdagpmul(::Type{Pgamma{3,-1}}, g, a::Spinor{NS,G}) where {NS,G} r1 = g\(a.s[1]+imm(a.s[3])) r2 = g\(a.s[2]-imm(a.s[4])) - return Spinor{4,G}((r1,r2,mimm(r1),imm(r2))) + return Spinor{NS,G}((r1,r2,mimm(r1),imm(r2))) end -@inline function gdagpmul(::Type{Pgamma{3,-1}}, g, a::Spinor{4,G}) where {NS,G} +@inline function gdagpmul(::Type{Pgamma{3,1}}, g, a::Spinor{NS,G}) where {NS,G} r1 = g\(a.s[1]-imm(a.s[3])) r2 = g\(a.s[2]+imm(a.s[4])) - return Spinor{4,G}((r1,r2,imm(r1),mimm(r2))) + return Spinor{NS,G}((r1,r2,imm(r1),mimm(r2))) end @@ -292,30 +307,30 @@ indexing for Dirac basis ``\\Gamma_n``: 10 sigma01 11 sigma02 12 sigma03 -13 sigma12 -14 sigma23 +13 sigma21 +14 sigma32 15 sigma31 16 identity """ -@inline dmul(::Type{Gamma{1}}, a::Spinor{4,G}) where {G} = Spinor{4,G}(mimm(a.s[4]), mimm(a.s[3]), imm(a.s[2]), imm(a.s[1])) -@inline dmul(::Type{Gamma{2}}, a::Spinor{4,G}) where {G} = Spinor{4,G}(-a.s[4], a.s[3], a.s[2], -a.s[1]) -@inline dmul(::Type{Gamma{3}}, a::Spinor{4,G}) where {G} = Spinor{4,G}(mimm(a.s[3]), imm(a.s[4]), imm(a.s[1]), mimm(a.s[2])) -@inline dmul(::Type{Gamma{4}}, a::Spinor{4,G}) where {G} = Spinor{4,G}(-a.s[3], -a.s[4], -a.s[1], -a.s[2]) -@inline dmul(::Type{Gamma{5}}, a::Spinor{4,G}) where {G} = Spinor{4,G}( a.s[1], a.s[2], -a.s[3], -a.s[4]) -@inline dmul(::Type{Gamma{6}}, a::Spinor{4,G}) where {G} = Spinor{4,G}( imm(a.s[4]), imm(a.s[3]), imm(a.s[2]), imm(a.s[1])) -@inline dmul(::Type{Gamma{7}}, a::Spinor{4,G}) where {G} = Spinor{4,G}( a.s[4], -a.s[3], a.s[2], -a.s[1]) -@inline dmul(::Type{Gamma{8}}, a::Spinor{4,G}) where {G} = Spinor{4,G}( imm(a.s[3]), mimm(a.s[4]), imm(a.s[1]), mimm(a.s[2])) -@inline dmul(::Type{Gamma{9}}, a::Spinor{4,G}) where {G} = Spinor{4,G}( a.s[3], a.s[4], -a.s[1], -a.s[2]) -@inline dmul(::Type{Gamma{10}}, a::Spinor{4,G}) where {G} = Spinor{4,G}( a.s[2], a.s[1], -a.s[4], -a.s[3]) -@inline dmul(::Type{Gamma{11}}, a::Spinor{4,G}) where {G} = Spinor{4,G}(mimm(a.s[2]), imm(a.s[1]), imm(a.s[4]), mimm(a.s[3])) -@inline dmul(::Type{Gamma{12}}, a::Spinor{4,G}) where {G} = Spinor{4,G}( a.s[1], -a.s[2], -a.s[3], a.s[4]) -@inline dmul(::Type{Gamma{13}}, a::Spinor{4,G}) where {G} = Spinor{4,G}(-a.s[1], a.s[2], -a.s[3], a.s[4]) -@inline dmul(::Type{Gamma{14}}, a::Spinor{4,G}) where {G} = Spinor{4,G}(-a.s[2], -a.s[1], -a.s[4], -a.s[3]) -@inline dmul(::Type{Gamma{15}}, a::Spinor{4,G}) where {G} = Spinor{4,G}( imm(a.s[2]), mimm(a.s[1]), imm(a.s[4]), mimm(a.s[3])) -@inline dmul(::Type{Gamma{16}}, a::Spinor{4,G}) where {G} = a +@inline dmul(::Type{Gamma{1}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}((mimm(a.s[4]), mimm(a.s[3]), imm(a.s[2]), imm(a.s[1]))) +@inline dmul(::Type{Gamma{2}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}((-a.s[4], a.s[3], a.s[2], -a.s[1])) +@inline dmul(::Type{Gamma{3}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}((mimm(a.s[3]), imm(a.s[4]), imm(a.s[1]), mimm(a.s[2]))) +@inline dmul(::Type{Gamma{4}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}((-a.s[3], -a.s[4], -a.s[1], -a.s[2])) +@inline dmul(::Type{Gamma{5}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}((a.s[1], a.s[2], -a.s[3], -a.s[4])) +@inline dmul(::Type{Gamma{6}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(( imm(a.s[4]), imm(a.s[3]), imm(a.s[2]), imm(a.s[1]))) +@inline dmul(::Type{Gamma{7}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(( a.s[4], -a.s[3], a.s[2], -a.s[1])) +@inline dmul(::Type{Gamma{8}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(( imm(a.s[3]), mimm(a.s[4]), imm(a.s[1]), mimm(a.s[2]))) +@inline dmul(::Type{Gamma{9}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(( a.s[3], a.s[4], -a.s[1], -a.s[2])) +@inline dmul(::Type{Gamma{10}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(( a.s[2], a.s[1], -a.s[4], -a.s[3])) +@inline dmul(::Type{Gamma{11}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}((mimm(a.s[2]), imm(a.s[1]), imm(a.s[4]), mimm(a.s[3]))) +@inline dmul(::Type{Gamma{12}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}(( a.s[1], -a.s[2], -a.s[3], a.s[4])) +@inline dmul(::Type{Gamma{13}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}((a.s[1], -a.s[2], a.s[3], -a.s[4])) +@inline dmul(::Type{Gamma{14}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}((a.s[2], a.s[1], a.s[4], a.s[3])) +@inline dmul(::Type{Gamma{15}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}((imm(a.s[2]), mimm(a.s[1]), imm(a.s[4]), mimm(a.s[3]))) +@inline dmul(::Type{Gamma{16}}, a::Spinor{NS,G}) where {NS,G} = a -export Spinor, Pgamma +export Spinor, Pgamma, Gamma export norm, norm2, dot, imm, mimm export pmul, gpmul, gdagpmul, dmul diff --git a/src/YM/YMio.jl b/src/YM/YMio.jl index aaca3d0..41e6132 100644 --- a/src/YM/YMio.jl +++ b/src/YM/YMio.jl @@ -157,7 +157,7 @@ function save_cnfg(fname::String, U, lp::SpaceParm{4,M,B,D}, gp::GaugeParm; run: if B == BC_SF_AFWB || B == BC_SF_ORBI for i3 in 1:lp.iL[3] for id in 1:lp.ndim-1 - assign!(id, V, i3, Ubnd[id]) + assign!(id, V, i3, gp.Ubnd[id]) end assign!(4, V, i3, zero(M3x3{Float64})) end diff --git a/test/dirac/test_fp_fa.jl b/test/dirac/test_fp_fa.jl new file mode 100644 index 0000000..ee931ae --- /dev/null +++ b/test/dirac/test_fp_fa.jl @@ -0,0 +1,125 @@ +using LatticeGPU +using CUDA +using TimerOutputs + + +@timeit "fA_fP test" begin + + +function fP_test(;theta = (0.5,0.7,1.0,0.0), m = 1.3, size = (8,8,8,16),prec = 1.0e-16) + +@timeit "fP inversion (x12)" begin + +lp = SpaceParm{4}(size,(4,4,4,4),1,(0,0,0,0,0,0)); +exptheta = exp.(im.*theta./lp.iL); + +dpar = DiracParam{Float64}(SU3fund,m,0.0,exptheta,1.0); +dws = DiracWorkspace(SU3fund{Float64},Float64,lp); + +U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64})); +psi = scalar_field(Spinor{4,SU3fund{Float64}},lp); + +res = zeros(lp.iL[4]) + +for s in 1:4 for c in 1:3 + bndpropagator!(psi,U,dpar,dws,lp,1000,prec,c,s); + + for t in 1:lp.iL[4] + #for i in 1:lp.iL[1] for j in 1:lp.iL[2] for k in 1:lp.iL[3] + i=abs(rand(Int))%lp.iL[1] +1;j=abs(rand(Int))%lp.iL[2] +1;k=abs(rand(Int))%lp.iL[3] +1; + CUDA.@allowscalar (res[t] += norm2(psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...])/2) + #end end end + #res[t] = res[t]/(lp.iL[1]*lp.iL[2]*lp.iL[3]) + + end + +end end + +end + +@timeit "fP analitical solution" begin + + #THEORETICAL SOLUTION: hep-lat/9606016 eq (2.33) + + res_th = zeros(lp.iL[4]) + + pp3 = ntuple(i -> theta[i]/lp.iL[i],3) + omega = 2 * asinh(0.5* sqrt(( sum((sin.(pp3)).^2) + (m + 2*(sum((sin.(pp3./2)).^2) ))^2) / (1+m+2*(sum((sin.(pp3./2)).^2) )) ) ) + pp = (-im*omega,pp3...) + Mpp = m + 2* sum((sin.(pp./2)).^2) + Rpp = Mpp*(1-exp(-2*omega*lp.iL[4])) + sinh(omega) * (1+exp(-2*omega*lp.iL[4])) + + for i in 2:lp.iL[4] + res_th[i] = (2*3*sinh(omega)/(Rpp^2)) * ( (Mpp + sinh(omega))*exp(-2*omega*(i-1)) - (Mpp - sinh(omega))*exp(-2*omega*(2*lp.iL[4]- (i - 1))) ) + end + +end + return sum(abs.(res-res_th)) + +end + +function fA_test(;theta = (0.5,0.7,1.0,0.0), m = 1.3, size = (8,8,8,16),prec = 1.0e-16) + +@timeit "fA inversion (x12)" begin + + lp = SpaceParm{4}(size,(4,4,4,4),1,(0,0,0,0,0,0)); + exptheta = exp.(im.*theta./lp.iL); + + dpar = DiracParam{Float64}(SU3fund,m,0.0,exptheta,1.0); + dws = DiracWorkspace(SU3fund{Float64},Float64,lp); + + U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64})); + psi = scalar_field(Spinor{4,SU3fund{Float64}},lp); + + res = im*zeros(lp.iL[4]) + + for s in 1:4 for c in 1:3 + bndpropagator!(psi,U,dpar,dws,lp,1000,prec,c,s); + + for t in 1:lp.iL[4] + #for i in 1:lp.iL[1] for j in 1:lp.iL[2] for k in 1:lp.iL[3] + i=abs(rand(Int))%lp.iL[1] +1;j=abs(rand(Int))%lp.iL[2] +1;k=abs(rand(Int))%lp.iL[3] +1; + CUDA.@allowscalar (res[t] += -dot(psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...],dmul(Gamma{4},psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...]))/2) + #end end end + #res[t] = res[t]/(lp.iL[1]*lp.iL[2]*lp.iL[3]) + + end + + end end + +end + #THEORETICAL SOLUTION: hep-lat/9606016 eq (2.32) + +@timeit "fA analitical solution" begin + res_th = zeros(lp.iL[4]) + + pp3 = ntuple(i -> theta[i]/lp.iL[i],3) + omega = 2 * asinh(0.5* sqrt(( sum((sin.(pp3)).^2) + (m + 2*(sum((sin.(pp3./2)).^2) ))^2) / (1+m+2*(sum((sin.(pp3./2)).^2) )) ) ) + pp = (-im*omega,pp3...) + Mpp = m + 2* sum((sin.(pp./2)).^2) + Rpp = Mpp*(1-exp(-2*omega*lp.iL[4])) + sinh(omega) * (1+exp(-2*omega*lp.iL[4])) + + for i in 2:lp.iL[4] + res_th[i] = (6/(Rpp^2)) * ( 2*(Mpp - sinh(omega))*(Mpp + sinh(omega))*exp(-2*omega*lp.iL[4]) + - Mpp*((Mpp + sinh(omega))*exp(-2*omega*(i-1)) + (Mpp - sinh(omega))*exp(-2*omega*(2*lp.iL[4]- (i - 1))))) + end + +end + + return sum(abs.(res-res_th)) + +end + + +difA = fA_test(); +difP = fP_test(); + +if difA > 1.0e-15 + error("fA test failed with error ", difA) +elseif difP > 1.0e-15 + error("fP test failed with error ", difP) +else + print("fA & fP tests passed with errors: ", difA," and ",difP,"!\n") +end + +end diff --git a/test/dirac/test_solver_plw.jl b/test/dirac/test_solver_plw.jl new file mode 100644 index 0000000..14100d0 --- /dev/null +++ b/test/dirac/test_solver_plw.jl @@ -0,0 +1,112 @@ +using LatticeGPU, CUDA, TimerOutputs + +#Test for the relation Dw(n|m)^{-1} e^(ipm) = D(p)^{-1} e^{ipn} with a given momenta (if p=0 its randomized), spin and color + +@timeit "Plw test" begin + +function Dwpw_test(;p=0,s=1,c=1) +lp = SpaceParm{4}((16,16,16,16), (4,4,4,4), 0, (0,0,0,0,0,0)) +gp = GaugeParm{Float64}(SU3{Float64}, 6.0, 1.0) +dpar = DiracParam{Float64}(SU3fund,1.3,0.0,(1.0,1.0,1.0,1.0),0.0) +dws = DiracWorkspace(SU3fund{Float64},Float64,lp); + +p==0 ? p = Int.(round.(lp.iL.*rand(4),RoundUp)) : nothing +U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64})) + +rm = 2* pi* p./(lp.iL) +rmom=(rm[1],rm[2],rm[3],rm[4]) + +@timeit "Generate plane wave" begin + +pwave = fill!(scalar_field(Spinor{4,SU3fund{Float64}},lp),zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp)))) +prop = scalar_field(Spinor{4,SU3fund{Float64}},lp) +prop_th = fill!(scalar_field(Spinor{4,SU3fund{Float64}},lp),zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp)))) + + +#Generate plane wave + +for x in 1:lp.iL[1] for y in 1:lp.iL[2] for z in 1:lp.iL[3] for t in 1:lp.iL[4] +CUDA.@allowscalar pwave[point_index(CartesianIndex{lp.ndim}((x,y,z,t)),lp)...] = exp(im * (x*rmom[1] + y*rmom[2] + z*rmom[3] + t*rmom[4]))*Spinor{4,SU3fund{Float64}}(ntuple(i -> (i==s)*SU3fund{Float64}(ntuple(j -> (j==c)*1.0,3)...),4)) +end end end end + +end + +@timeit "Generate analitical solution" begin + +#Th solution + +if s == 1 + vals = (dpar.m0 + 4.0 - sum(cos.(rmom)),0.0,im*sin(rmom[4])+sin(rmom[3]),im*sin(rmom[2])+sin(rmom[1])) + for x in 1:lp.iL[1] for y in 1:lp.iL[2] for z in 1:lp.iL[3] for t in 1:lp.iL[4] + CUDA.@allowscalar prop_th[point_index(CartesianIndex{lp.ndim}((x,y,z,t)),lp)...] = exp(im * (x*rmom[1] + y*rmom[2] + z*rmom[3] + t*rmom[4]))* + ( Spinor{4,SU3fund{Float64}}(ntuple(i -> SU3fund{Float64}(ntuple(j -> (j==c)*vals[i],3)...),4)) )/(sum((sin.(rmom)) .^2) + (dpar.m0+ 4.0 - sum(cos.(rmom)))^2) + end end end end +elseif s == 2 + vals = (0.0,dpar.m0 + 4.0 - sum(cos.(rmom)),sin(rmom[1]) - im *sin(rmom[2]),-sin(rmom[3])+im*sin(rmom[4])) + for x in 1:lp.iL[1] for y in 1:lp.iL[2] for z in 1:lp.iL[3] for t in 1:lp.iL[4] + CUDA.@allowscalar prop_th[point_index(CartesianIndex{lp.ndim}((x,y,z,t)),lp)...] = exp(im * (x*rmom[1] + y*rmom[2] + z*rmom[3] + t*rmom[4]))* + ( Spinor{4,SU3fund{Float64}}(ntuple(i -> SU3fund{Float64}(ntuple(j -> (j==c)*vals[i],3)...),4)) )/(sum((sin.(rmom)) .^2) + (dpar.m0+ 4.0 - sum(cos.(rmom)))^2) + end end end end +elseif s == 3 + vals = (-sin(rmom[3])+im*sin(rmom[4]),-sin(rmom[1])-im*sin(rmom[2]),dpar.m0 + 4.0 - sum(cos.(rmom)),0.0) + for x in 1:lp.iL[1] for y in 1:lp.iL[2] for z in 1:lp.iL[3] for t in 1:lp.iL[4] + CUDA.@allowscalar prop_th[point_index(CartesianIndex{lp.ndim}((x,y,z,t)),lp)...] = exp(im * (x*rmom[1] + y*rmom[2] + z*rmom[3] + t*rmom[4]))* + ( Spinor{4,SU3fund{Float64}}(ntuple(i -> SU3fund{Float64}(ntuple(j -> (j==c)*vals[i],3)...),4)) )/(sum((sin.(rmom)) .^2) + (dpar.m0+ 4.0 - sum(cos.(rmom)))^2) + end end end end +else + vals = (-sin(rmom[1])+im*sin(rmom[2]),sin(rmom[3])+im*sin(rmom[4]),0.0,dpar.m0 + 4.0 - sum(cos.(rmom))) + for x in 1:lp.iL[1] for y in 1:lp.iL[2] for z in 1:lp.iL[3] for t in 1:lp.iL[4] + CUDA.@allowscalar prop_th[point_index(CartesianIndex{lp.ndim}((x,y,z,t)),lp)...] = exp(im * (x*rmom[1] + y*rmom[2] + z*rmom[3] + t*rmom[4]))* + ( Spinor{4,SU3fund{Float64}}(ntuple(i -> SU3fund{Float64}(ntuple(j -> (j==c)*vals[i],3)...),4)) )/(sum((sin.(rmom)) .^2) + (dpar.m0+ 4.0 - sum(cos.(rmom)))^2) + end end end end +end + +end + + +#compute Sum{x} D^-1(x|y)P(y) + +@timeit "Solving propagator" begin + +function krnlg5!(src) + b=Int64(CUDA.threadIdx().x) + r=Int64(CUDA.blockIdx().x) + src[b,r] = dmul(Gamma{5},src[b,r]) + return nothing +end + +CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(pwave) +end +g5Dw!(prop,U,pwave,dpar,dws,lp) +CG!(prop,U,DwdagDw!,dpar,lp,dws,10000,1.0e-14) +end + +dif = sum(norm2.(prop - prop_th)) + +if dif > 1.0e-15 + error("Dwpl test for s=",s,", c=",c," failed with difference: ",dif,"\n") +end + + +return dif +end + + + + +begin +dif = 0.0 +for i in 1:3 for j in 1:4 + dif += Dwpw_test(c=i,s=j) +end end + +if dif < 1.0e-15 + print("Dwpl test passed with average error ", dif/12,"!\n") +else + error("Dwpl test failed with difference: ",dif,"\n") +end + + +end +end diff --git a/test/dirac/test_solver_rand.jl b/test/dirac/test_solver_rand.jl new file mode 100644 index 0000000..91a477d --- /dev/null +++ b/test/dirac/test_solver_rand.jl @@ -0,0 +1,54 @@ +using CUDA, LatticeGPU, TimerOutputs + +#Check that Dw ( (DwdagDw)^{-1} g5 Dw g5 ) psi = psi for random fields + +@timeit "Rand solver test" begin + +@timeit "Generate random fields" begin + + lp = SpaceParm{4}((16,16,16,16), (4,4,4,4), 0, (0,0,0,0,0,0)) + gp = GaugeParm{Float64}(SU3{Float64}, 6.0, 1.0) + ymws = YMworkspace(SU3, Float64, lp) + dpar = DiracParam{Float64}(SU3fund,2.3,0.0,(1.0,1.0,1.0,1.0),0.0) + dws = DiracWorkspace(SU3fund{Float64},Float64,lp); + + randomize!(ymws.mom, lp, ymws) + U = exp.(ymws.mom) + + rpsi = scalar_field(Spinor{4,SU3fund{Float64}},lp) + pfrandomize!(rpsi,lp) + +end + +prop = scalar_field(Spinor{4,SU3fund{Float64}},lp) + +function krnlg5!(src) + b=Int64(CUDA.threadIdx().x) + r=Int64(CUDA.blockIdx().x) + src[b,r] = dmul(Gamma{5},src[b,r]) + return nothing +end + +CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(rpsi) +end +g5Dw!(prop,U,rpsi,dpar,dws,lp) +CG!(prop,U,DwdagDw!,dpar,lp,dws,10000,1.0e-14) + +Dw!(dws.sp,U,prop,dpar,dws,lp) + +CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(rpsi) +end + +res = sum(norm2.(rpsi-dws.sp)) + + +if res < 1.0e-6 + print("Drand test passed with ",res,"% error!\n") + +else + error("Drand test failed with difference: ",res,"\n") +end + +end