SF fix for the propagators

2025-07-14 20:59:26 +02:00 · 2023-12-13 11:42:26 +01:00 · 2023-12-13 11:42:26 +01:00 · a7bc21769b
commit a7bc21769b
parent 6a90d74024
5 changed files with 101 additions and 91 deletions
--- a/docs/src/dirac.md
+++ b/docs/src/dirac.md
@ -63,6 +63,11 @@ in the first time-slice is zero. To enforce this, we have the function
 SF_bndfix!
 ```

+Note that this is not enforced in the Dirac operators, so if the field `so` does not satisfy SF
+boundary conditions, it will not (in general) satisfy them after applying [`Dw!`](@ref) 
+or [`g5Dw!`](@ref). This function is called for the function [`DwdagDw!`](@ref), so in this case
+`so` will always be a proper SF field after calling this function.
+
 The function [`Csw!`](@ref) is used to store the clover in `dws.csw`. It is computed 
 according to the expression

--- a/src/Dirac/Dirac.jl
+++ b/src/Dirac/Dirac.jl
@ -28,7 +28,7 @@ The parameters are:
 - `m0::T` : Mass of the fermion
 - `csw::T` : Improvement coefficient for the Csw term
 - `th{Ntuple{4,Complex{T}}}` : Phase for the fermions included in the boundary conditions, reabsorbed in the Dirac operator.
- `tm` : Twisted mass paramete
+- `tm` : Twisted mass parameter
 - `ct` : Boundary improvement term, only used for Schrödinger Funtional boundary conditions. 
 """
 struct DiracParam{T,R}
@ -534,12 +534,13 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{Sp
                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
                end
            end
-
+            SF_bndfix!(dws.st,lp)
            @timeit "g5Dw" begin
                CUDA.@sync begin
                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
                end
            end
+            SF_bndfix!(so,lp)
        end
    else
        @timeit "DwdagDw" begin
@ -549,12 +550,13 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{Sp
                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
                end
            end
-
+            SF_bndfix!(dws.st,lp)
            @timeit "g5Dw" begin
                CUDA.@sync begin
                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
                end
            end
+            SF_bndfix!(so,lp)
        end
    end

@ -604,10 +606,11 @@ end
 Sets all the values of `sp` in the  first time slice to zero.
 """
 function SF_bndfix!(sp, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
-    CUDA.@sync begin
-        CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(sp, lp)
+    @timeit "SF boundary fix" begin
+        CUDA.@sync begin
+            CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(sp, lp)
+        end
    end
-    
    return nothing
 end

--- a/src/Solvers/Propagators.jl
+++ b/src/Solvers/Propagators.jl
@ -56,7 +56,7 @@ end

    function bndpropagator!(pro,U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)
        
-Saves the propagator in from the t=0 boundary to the bulk for the SF boundary conditions for a source with color 'c' and spin 's'. The factor c_t is included while the factor 1/sqrt(V) is not.
+Saves the propagator from the t=0 boundary to the bulk for the SF boundary conditions for a source with color 'c' and spin 's' in 'pro'. The factor c_t is included while the factor 1/sqrt(V) is not.
 For the propagator from T to the bulk, use the function Tbndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)

 """
@ -81,6 +81,7 @@ function bndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::Sp
        return nothing
    end

+    SF_bndfix!(pro,lp)
    fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
    
    CUDA.@sync begin
@ -94,7 +95,7 @@ function bndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::Sp
    g5Dw!(pro,U,dpar.ct*dws.sp,dpar,dws,lp)
    
    CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
-    return pro
+    return nothing
 end

 """
@ -124,7 +125,8 @@ function Tbndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::S

        return nothing
    end
-  
+
+    SF_bndfix!(pro,lp)
    fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
    
    CUDA.@sync begin
@ -138,7 +140,7 @@ function Tbndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::S
    g5Dw!(pro,U,dpar.ct*dws.sp,dpar,dws,lp)
    
    CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
-    return pro
+    return nothing
 end


--- a/test/dirac/test_fp_fa.jl
+++ b/test/dirac/test_fp_fa.jl
@ -8,106 +8,106 @@ using TimerOutputs

 function fP_test(;theta = (0.5,0.7,1.0,0.0), m = 1.3, size = (8,8,8,16),prec = 1.0e-16)

-@timeit "fP inversion (x12)" begin
+    @timeit "fP inversion (x12)" begin

-lp = SpaceParm{4}(size,(4,4,4,4),1,(0,0,0,0,0,0));
-exptheta = exp.(im.*theta./lp.iL);
+    lp = SpaceParm{4}(size,(4,4,4,4),1,(0,0,0,0,0,0));
+    exptheta = exp.(im.*theta./lp.iL);

-dpar = DiracParam{Float64}(SU3fund,m,0.0,exptheta,0.0,1.0);
-dws = DiracWorkspace(SU3fund,Float64,lp);
+    dpar = DiracParam{Float64}(SU3fund,m,0.0,exptheta,0.0,1.0);
+    dws = DiracWorkspace(SU3fund,Float64,lp);

-U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64}));
-psi = scalar_field(Spinor{4,SU3fund{Float64}},lp);
+    U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64}));
+    psi = scalar_field(Spinor{4,SU3fund{Float64}},lp);

-res = zeros(lp.iL[4])
+    res = zeros(lp.iL[4])

-for s in 1:4 for c in 1:3
-    bndpropagator!(psi,U,dpar,dws,lp,1000,prec,c,s);
+    for s in 1:4 for c in 1:3
+        bndpropagator!(psi,U,dpar,dws,lp,1000,prec,c,s);

-    for t in 1:lp.iL[4]
-    #for i in 1:lp.iL[1]    for j in 1:lp.iL[2]        for k in 1:lp.iL[3]
-                i=abs(rand(Int))%lp.iL[1] +1;j=abs(rand(Int))%lp.iL[2] +1;k=abs(rand(Int))%lp.iL[3] +1;
-                CUDA.@allowscalar (res[t] += norm2(psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...])/2)
-    #end end end
-    #res[t] = res[t]/(lp.iL[1]*lp.iL[2]*lp.iL[3])
+        for t in 1:lp.iL[4]
+        #for i in 1:lp.iL[1]    for j in 1:lp.iL[2]        for k in 1:lp.iL[3]
+                    i=abs(rand(Int))%lp.iL[1] +1;j=abs(rand(Int))%lp.iL[2] +1;k=abs(rand(Int))%lp.iL[3] +1;
+                    CUDA.@allowscalar (res[t] += norm2(psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...])/2)
+        #end end end
+        #res[t] = res[t]/(lp.iL[1]*lp.iL[2]*lp.iL[3])
+
+        end
+
+    end end

    end

-end end
+    @timeit "fP analitical solution" begin

-end
+        #THEORETICAL SOLUTION: hep-lat/9606016 eq (2.33)

-@timeit "fP analitical solution" begin
+        res_th = zeros(lp.iL[4])

-    #THEORETICAL SOLUTION: hep-lat/9606016 eq (2.33)
+        pp3 = ntuple(i -> theta[i]/lp.iL[i],3)
+        omega = 2 * asinh(0.5* sqrt(( sum((sin.(pp3)).^2) + (m + 2*(sum((sin.(pp3./2)).^2) ))^2) / (1+m+2*(sum((sin.(pp3./2)).^2) )) ) )
+        pp = (-im*omega,pp3...)
+        Mpp = m + 2* sum((sin.(pp./2)).^2)
+        Rpp = Mpp*(1-exp(-2*omega*lp.iL[4])) + sinh(omega) * (1+exp(-2*omega*lp.iL[4]))

-    res_th = zeros(lp.iL[4])
+        for i in 2:lp.iL[4]
+            res_th[i] = (2*3*sinh(omega)/(Rpp^2)) * ( (Mpp + sinh(omega))*exp(-2*omega*(i-1)) - (Mpp - sinh(omega))*exp(-2*omega*(2*lp.iL[4]- (i - 1))) )
+        end

-    pp3 = ntuple(i -> theta[i]/lp.iL[i],3)
-    omega = 2 * asinh(0.5* sqrt(( sum((sin.(pp3)).^2) + (m + 2*(sum((sin.(pp3./2)).^2) ))^2) / (1+m+2*(sum((sin.(pp3./2)).^2) )) ) )
-    pp = (-im*omega,pp3...)
-    Mpp = m + 2* sum((sin.(pp./2)).^2)
-    Rpp = Mpp*(1-exp(-2*omega*lp.iL[4])) + sinh(omega) * (1+exp(-2*omega*lp.iL[4]))
-
-    for i in 2:lp.iL[4]
-        res_th[i] = (2*3*sinh(omega)/(Rpp^2)) * ( (Mpp + sinh(omega))*exp(-2*omega*(i-1)) - (Mpp - sinh(omega))*exp(-2*omega*(2*lp.iL[4]- (i - 1))) )
    end
-
-end
    return sum(abs.(res-res_th))

 end

 function fA_test(;theta = (0.5,0.7,1.0,0.0), m = 1.3, size = (8,8,8,16),prec = 1.0e-16)

-@timeit "fA inversion (x12)" begin
+    @timeit "fA inversion (x12)" begin
+
+        lp = SpaceParm{4}(size,(4,4,4,4),1,(0,0,0,0,0,0));
+        exptheta = exp.(im.*theta./lp.iL);
+
+        dpar = DiracParam{Float64}(SU3fund,m,0.0,exptheta,0.0,1.0);
+        dws = DiracWorkspace(SU3fund,Float64,lp);
+
+        U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64}));
+        psi = scalar_field(Spinor{4,SU3fund{Float64}},lp);
+
+        res = im*zeros(lp.iL[4])
+
+        for s in 1:4 for c in 1:3
+            bndpropagator!(psi,U,dpar,dws,lp,1000,prec,c,s);
+
+            for t in 1:lp.iL[4]
+            #for i in 1:lp.iL[1]    for j in 1:lp.iL[2]        for k in 1:lp.iL[3]
+                        i=abs(rand(Int))%lp.iL[1] +1;j=abs(rand(Int))%lp.iL[2] +1;k=abs(rand(Int))%lp.iL[3] +1;
+                        CUDA.@allowscalar (res[t] += -dot(psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...],dmul(Gamma{4},psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...]))/2)
+            #end end end
+            #res[t] = res[t]/(lp.iL[1]*lp.iL[2]*lp.iL[3])
+
+            end
+
+        end end

-    lp = SpaceParm{4}(size,(4,4,4,4),1,(0,0,0,0,0,0));
-    exptheta = exp.(im.*theta./lp.iL);
-    
-    dpar = DiracParam{Float64}(SU3fund,m,0.0,exptheta,0.0,1.0);
-    dws = DiracWorkspace(SU3fund,Float64,lp);
-    
-    U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64}));
-    psi = scalar_field(Spinor{4,SU3fund{Float64}},lp);
-    
-    res = im*zeros(lp.iL[4])
-    
-    for s in 1:4 for c in 1:3
-        bndpropagator!(psi,U,dpar,dws,lp,1000,prec,c,s);
-    
-        for t in 1:lp.iL[4]
-        #for i in 1:lp.iL[1]    for j in 1:lp.iL[2]        for k in 1:lp.iL[3]
-                    i=abs(rand(Int))%lp.iL[1] +1;j=abs(rand(Int))%lp.iL[2] +1;k=abs(rand(Int))%lp.iL[3] +1;
-                    CUDA.@allowscalar (res[t] += -dot(psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...],dmul(Gamma{4},psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...]))/2)
-        #end end end
-        #res[t] = res[t]/(lp.iL[1]*lp.iL[2]*lp.iL[3])
-    
-        end
-    
-    end end
-    
-end
-    #THEORETICAL SOLUTION: hep-lat/9606016 eq (2.32)
-   
-@timeit "fA analitical solution" begin
-    res_th = zeros(lp.iL[4])
-    
-    pp3 = ntuple(i -> theta[i]/lp.iL[i],3)
-    omega = 2 * asinh(0.5* sqrt(( sum((sin.(pp3)).^2) + (m + 2*(sum((sin.(pp3./2)).^2) ))^2) / (1+m+2*(sum((sin.(pp3./2)).^2) )) ) )
-    pp = (-im*omega,pp3...)
-    Mpp = m + 2* sum((sin.(pp./2)).^2)
-    Rpp = Mpp*(1-exp(-2*omega*lp.iL[4])) + sinh(omega) * (1+exp(-2*omega*lp.iL[4]))
-    
-    for i in 2:lp.iL[4]
-        res_th[i] = (6/(Rpp^2)) * ( 2*(Mpp - sinh(omega))*(Mpp + sinh(omega))*exp(-2*omega*lp.iL[4]) 
-        - Mpp*((Mpp + sinh(omega))*exp(-2*omega*(i-1)) + (Mpp - sinh(omega))*exp(-2*omega*(2*lp.iL[4]- (i - 1)))))
    end
-    
-end
-    
+        #THEORETICAL SOLUTION: hep-lat/9606016 eq (2.32)
+
+    @timeit "fA analitical solution" begin
+        res_th = zeros(lp.iL[4])
+
+        pp3 = ntuple(i -> theta[i]/lp.iL[i],3)
+        omega = 2 * asinh(0.5* sqrt(( sum((sin.(pp3)).^2) + (m + 2*(sum((sin.(pp3./2)).^2) ))^2) / (1+m+2*(sum((sin.(pp3./2)).^2) )) ) )
+        pp = (-im*omega,pp3...)
+        Mpp = m + 2* sum((sin.(pp./2)).^2)
+        Rpp = Mpp*(1-exp(-2*omega*lp.iL[4])) + sinh(omega) * (1+exp(-2*omega*lp.iL[4]))
+
+        for i in 2:lp.iL[4]
+            res_th[i] = (6/(Rpp^2)) * ( 2*(Mpp - sinh(omega))*(Mpp + sinh(omega))*exp(-2*omega*lp.iL[4])
+            - Mpp*((Mpp + sinh(omega))*exp(-2*omega*(i-1)) + (Mpp - sinh(omega))*exp(-2*omega*(2*lp.iL[4]- (i - 1)))))
+        end
+
+    end
+
    return sum(abs.(res-res_th))
-    
+
 end


--- a/test/dirac/test_solver_plw.jl
+++ b/test/dirac/test_solver_plw.jl
@ -96,15 +96,15 @@ end


 begin
-dif = 0.0
+diff = 0.0
 for i in 1:3 for j in 1:4
-    dif += Dwpw_test(c=i,s=j)
+    global diff += Dwpw_test(c=i,s=j)
 end end

-if dif < 1.0e-15
-    print("Dwpl test passed with average error ", dif/12,"!\n")    
+if diff < 1.0e-15
+    print("Dwpl test passed with average error ", diff/12,"!\n")
 else
-    error("Dwpl test failed with difference: ",dif,"\n")
+    error("Dwpl test failed with difference: ",diff,"\n")
 end