diff --git a/docs/src/dirac.md b/docs/src/dirac.md index 30c59a9..c664063 100644 --- a/docs/src/dirac.md +++ b/docs/src/dirac.md @@ -63,6 +63,11 @@ in the first time-slice is zero. To enforce this, we have the function SF_bndfix! ``` +Note that this is not enforced in the Dirac operators, so if the field `so` does not satisfy SF +boundary conditions, it will not (in general) satisfy them after applying [`Dw!`](@ref) +or [`g5Dw!`](@ref). This function is called for the function [`DwdagDw!`](@ref), so in this case +`so` will always be a proper SF field after calling this function. + The function [`Csw!`](@ref) is used to store the clover in `dws.csw`. It is computed according to the expression diff --git a/src/Dirac/Dirac.jl b/src/Dirac/Dirac.jl index b57fa6c..a6ff103 100644 --- a/src/Dirac/Dirac.jl +++ b/src/Dirac/Dirac.jl @@ -28,7 +28,7 @@ The parameters are: - `m0::T` : Mass of the fermion - `csw::T` : Improvement coefficient for the Csw term - `th{Ntuple{4,Complex{T}}}` : Phase for the fermions included in the boundary conditions, reabsorbed in the Dirac operator. -- `tm` : Twisted mass paramete +- `tm` : Twisted mass parameter - `ct` : Boundary improvement term, only used for Schrödinger Funtional boundary conditions. """ struct DiracParam{T,R} @@ -534,12 +534,13 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{Sp CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp) end end - + SF_bndfix!(dws.st,lp) @timeit "g5Dw" begin CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp) end end + SF_bndfix!(so,lp) end else @timeit "DwdagDw" begin @@ -549,12 +550,13 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{Sp CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp) end end - + SF_bndfix!(dws.st,lp) @timeit "g5Dw" begin CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp) end end + SF_bndfix!(so,lp) end end @@ -604,10 +606,11 @@ end Sets all the values of `sp` in the first time slice to zero. """ function SF_bndfix!(sp, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D} - CUDA.@sync begin - CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(sp, lp) + @timeit "SF boundary fix" begin + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(sp, lp) + end end - return nothing end diff --git a/src/Solvers/Propagators.jl b/src/Solvers/Propagators.jl index 73ebcce..99cf462 100644 --- a/src/Solvers/Propagators.jl +++ b/src/Solvers/Propagators.jl @@ -56,7 +56,7 @@ end function bndpropagator!(pro,U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64) -Saves the propagator in from the t=0 boundary to the bulk for the SF boundary conditions for a source with color 'c' and spin 's'. The factor c_t is included while the factor 1/sqrt(V) is not. +Saves the propagator from the t=0 boundary to the bulk for the SF boundary conditions for a source with color 'c' and spin 's' in 'pro'. The factor c_t is included while the factor 1/sqrt(V) is not. For the propagator from T to the bulk, use the function Tbndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64) """ @@ -81,6 +81,7 @@ function bndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::Sp return nothing end + SF_bndfix!(pro,lp) fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp)))) CUDA.@sync begin @@ -94,7 +95,7 @@ function bndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::Sp g5Dw!(pro,U,dpar.ct*dws.sp,dpar,dws,lp) CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol) - return pro + return nothing end """ @@ -124,7 +125,8 @@ function Tbndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::S return nothing end - + + SF_bndfix!(pro,lp) fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp)))) CUDA.@sync begin @@ -138,7 +140,7 @@ function Tbndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::S g5Dw!(pro,U,dpar.ct*dws.sp,dpar,dws,lp) CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol) - return pro + return nothing end diff --git a/test/dirac/test_fp_fa.jl b/test/dirac/test_fp_fa.jl index ea49897..adaf3eb 100644 --- a/test/dirac/test_fp_fa.jl +++ b/test/dirac/test_fp_fa.jl @@ -8,106 +8,106 @@ using TimerOutputs function fP_test(;theta = (0.5,0.7,1.0,0.0), m = 1.3, size = (8,8,8,16),prec = 1.0e-16) -@timeit "fP inversion (x12)" begin + @timeit "fP inversion (x12)" begin -lp = SpaceParm{4}(size,(4,4,4,4),1,(0,0,0,0,0,0)); -exptheta = exp.(im.*theta./lp.iL); + lp = SpaceParm{4}(size,(4,4,4,4),1,(0,0,0,0,0,0)); + exptheta = exp.(im.*theta./lp.iL); -dpar = DiracParam{Float64}(SU3fund,m,0.0,exptheta,0.0,1.0); -dws = DiracWorkspace(SU3fund,Float64,lp); + dpar = DiracParam{Float64}(SU3fund,m,0.0,exptheta,0.0,1.0); + dws = DiracWorkspace(SU3fund,Float64,lp); -U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64})); -psi = scalar_field(Spinor{4,SU3fund{Float64}},lp); + U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64})); + psi = scalar_field(Spinor{4,SU3fund{Float64}},lp); -res = zeros(lp.iL[4]) + res = zeros(lp.iL[4]) -for s in 1:4 for c in 1:3 - bndpropagator!(psi,U,dpar,dws,lp,1000,prec,c,s); + for s in 1:4 for c in 1:3 + bndpropagator!(psi,U,dpar,dws,lp,1000,prec,c,s); - for t in 1:lp.iL[4] - #for i in 1:lp.iL[1] for j in 1:lp.iL[2] for k in 1:lp.iL[3] - i=abs(rand(Int))%lp.iL[1] +1;j=abs(rand(Int))%lp.iL[2] +1;k=abs(rand(Int))%lp.iL[3] +1; - CUDA.@allowscalar (res[t] += norm2(psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...])/2) - #end end end - #res[t] = res[t]/(lp.iL[1]*lp.iL[2]*lp.iL[3]) + for t in 1:lp.iL[4] + #for i in 1:lp.iL[1] for j in 1:lp.iL[2] for k in 1:lp.iL[3] + i=abs(rand(Int))%lp.iL[1] +1;j=abs(rand(Int))%lp.iL[2] +1;k=abs(rand(Int))%lp.iL[3] +1; + CUDA.@allowscalar (res[t] += norm2(psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...])/2) + #end end end + #res[t] = res[t]/(lp.iL[1]*lp.iL[2]*lp.iL[3]) + + end + + end end end -end end + @timeit "fP analitical solution" begin -end + #THEORETICAL SOLUTION: hep-lat/9606016 eq (2.33) -@timeit "fP analitical solution" begin + res_th = zeros(lp.iL[4]) - #THEORETICAL SOLUTION: hep-lat/9606016 eq (2.33) + pp3 = ntuple(i -> theta[i]/lp.iL[i],3) + omega = 2 * asinh(0.5* sqrt(( sum((sin.(pp3)).^2) + (m + 2*(sum((sin.(pp3./2)).^2) ))^2) / (1+m+2*(sum((sin.(pp3./2)).^2) )) ) ) + pp = (-im*omega,pp3...) + Mpp = m + 2* sum((sin.(pp./2)).^2) + Rpp = Mpp*(1-exp(-2*omega*lp.iL[4])) + sinh(omega) * (1+exp(-2*omega*lp.iL[4])) - res_th = zeros(lp.iL[4]) + for i in 2:lp.iL[4] + res_th[i] = (2*3*sinh(omega)/(Rpp^2)) * ( (Mpp + sinh(omega))*exp(-2*omega*(i-1)) - (Mpp - sinh(omega))*exp(-2*omega*(2*lp.iL[4]- (i - 1))) ) + end - pp3 = ntuple(i -> theta[i]/lp.iL[i],3) - omega = 2 * asinh(0.5* sqrt(( sum((sin.(pp3)).^2) + (m + 2*(sum((sin.(pp3./2)).^2) ))^2) / (1+m+2*(sum((sin.(pp3./2)).^2) )) ) ) - pp = (-im*omega,pp3...) - Mpp = m + 2* sum((sin.(pp./2)).^2) - Rpp = Mpp*(1-exp(-2*omega*lp.iL[4])) + sinh(omega) * (1+exp(-2*omega*lp.iL[4])) - - for i in 2:lp.iL[4] - res_th[i] = (2*3*sinh(omega)/(Rpp^2)) * ( (Mpp + sinh(omega))*exp(-2*omega*(i-1)) - (Mpp - sinh(omega))*exp(-2*omega*(2*lp.iL[4]- (i - 1))) ) end - -end return sum(abs.(res-res_th)) end function fA_test(;theta = (0.5,0.7,1.0,0.0), m = 1.3, size = (8,8,8,16),prec = 1.0e-16) -@timeit "fA inversion (x12)" begin + @timeit "fA inversion (x12)" begin + + lp = SpaceParm{4}(size,(4,4,4,4),1,(0,0,0,0,0,0)); + exptheta = exp.(im.*theta./lp.iL); + + dpar = DiracParam{Float64}(SU3fund,m,0.0,exptheta,0.0,1.0); + dws = DiracWorkspace(SU3fund,Float64,lp); + + U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64})); + psi = scalar_field(Spinor{4,SU3fund{Float64}},lp); + + res = im*zeros(lp.iL[4]) + + for s in 1:4 for c in 1:3 + bndpropagator!(psi,U,dpar,dws,lp,1000,prec,c,s); + + for t in 1:lp.iL[4] + #for i in 1:lp.iL[1] for j in 1:lp.iL[2] for k in 1:lp.iL[3] + i=abs(rand(Int))%lp.iL[1] +1;j=abs(rand(Int))%lp.iL[2] +1;k=abs(rand(Int))%lp.iL[3] +1; + CUDA.@allowscalar (res[t] += -dot(psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...],dmul(Gamma{4},psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...]))/2) + #end end end + #res[t] = res[t]/(lp.iL[1]*lp.iL[2]*lp.iL[3]) + + end + + end end - lp = SpaceParm{4}(size,(4,4,4,4),1,(0,0,0,0,0,0)); - exptheta = exp.(im.*theta./lp.iL); - - dpar = DiracParam{Float64}(SU3fund,m,0.0,exptheta,0.0,1.0); - dws = DiracWorkspace(SU3fund,Float64,lp); - - U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64})); - psi = scalar_field(Spinor{4,SU3fund{Float64}},lp); - - res = im*zeros(lp.iL[4]) - - for s in 1:4 for c in 1:3 - bndpropagator!(psi,U,dpar,dws,lp,1000,prec,c,s); - - for t in 1:lp.iL[4] - #for i in 1:lp.iL[1] for j in 1:lp.iL[2] for k in 1:lp.iL[3] - i=abs(rand(Int))%lp.iL[1] +1;j=abs(rand(Int))%lp.iL[2] +1;k=abs(rand(Int))%lp.iL[3] +1; - CUDA.@allowscalar (res[t] += -dot(psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...],dmul(Gamma{4},psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...]))/2) - #end end end - #res[t] = res[t]/(lp.iL[1]*lp.iL[2]*lp.iL[3]) - - end - - end end - -end - #THEORETICAL SOLUTION: hep-lat/9606016 eq (2.32) - -@timeit "fA analitical solution" begin - res_th = zeros(lp.iL[4]) - - pp3 = ntuple(i -> theta[i]/lp.iL[i],3) - omega = 2 * asinh(0.5* sqrt(( sum((sin.(pp3)).^2) + (m + 2*(sum((sin.(pp3./2)).^2) ))^2) / (1+m+2*(sum((sin.(pp3./2)).^2) )) ) ) - pp = (-im*omega,pp3...) - Mpp = m + 2* sum((sin.(pp./2)).^2) - Rpp = Mpp*(1-exp(-2*omega*lp.iL[4])) + sinh(omega) * (1+exp(-2*omega*lp.iL[4])) - - for i in 2:lp.iL[4] - res_th[i] = (6/(Rpp^2)) * ( 2*(Mpp - sinh(omega))*(Mpp + sinh(omega))*exp(-2*omega*lp.iL[4]) - - Mpp*((Mpp + sinh(omega))*exp(-2*omega*(i-1)) + (Mpp - sinh(omega))*exp(-2*omega*(2*lp.iL[4]- (i - 1))))) end - -end - + #THEORETICAL SOLUTION: hep-lat/9606016 eq (2.32) + + @timeit "fA analitical solution" begin + res_th = zeros(lp.iL[4]) + + pp3 = ntuple(i -> theta[i]/lp.iL[i],3) + omega = 2 * asinh(0.5* sqrt(( sum((sin.(pp3)).^2) + (m + 2*(sum((sin.(pp3./2)).^2) ))^2) / (1+m+2*(sum((sin.(pp3./2)).^2) )) ) ) + pp = (-im*omega,pp3...) + Mpp = m + 2* sum((sin.(pp./2)).^2) + Rpp = Mpp*(1-exp(-2*omega*lp.iL[4])) + sinh(omega) * (1+exp(-2*omega*lp.iL[4])) + + for i in 2:lp.iL[4] + res_th[i] = (6/(Rpp^2)) * ( 2*(Mpp - sinh(omega))*(Mpp + sinh(omega))*exp(-2*omega*lp.iL[4]) + - Mpp*((Mpp + sinh(omega))*exp(-2*omega*(i-1)) + (Mpp - sinh(omega))*exp(-2*omega*(2*lp.iL[4]- (i - 1))))) + end + + end + return sum(abs.(res-res_th)) - + end diff --git a/test/dirac/test_solver_plw.jl b/test/dirac/test_solver_plw.jl index 9bd1ba8..a4de0c7 100644 --- a/test/dirac/test_solver_plw.jl +++ b/test/dirac/test_solver_plw.jl @@ -96,15 +96,15 @@ end begin -dif = 0.0 +diff = 0.0 for i in 1:3 for j in 1:4 - dif += Dwpw_test(c=i,s=j) + global diff += Dwpw_test(c=i,s=j) end end -if dif < 1.0e-15 - print("Dwpl test passed with average error ", dif/12,"!\n") +if diff < 1.0e-15 + print("Dwpl test passed with average error ", diff/12,"!\n") else - error("Dwpl test failed with difference: ",dif,"\n") + error("Dwpl test failed with difference: ",diff,"\n") end