diff --git a/docs/src/dirac.md b/docs/src/dirac.md
index 30c59a9..c664063 100644
--- a/docs/src/dirac.md
+++ b/docs/src/dirac.md
@@ -63,6 +63,11 @@ in the first time-slice is zero. To enforce this, we have the function
 SF_bndfix!
 ```
 
+Note that this is not enforced in the Dirac operators, so if the field `so` does not satisfy SF
+boundary conditions, it will not (in general) satisfy them after applying [`Dw!`](@ref) 
+or [`g5Dw!`](@ref). This function is called for the function [`DwdagDw!`](@ref), so in this case
+`so` will always be a proper SF field after calling this function.
+
 The function [`Csw!`](@ref) is used to store the clover in `dws.csw`. It is computed 
 according to the expression
 
diff --git a/src/Dirac/Dirac.jl b/src/Dirac/Dirac.jl
index b57fa6c..a6ff103 100644
--- a/src/Dirac/Dirac.jl
+++ b/src/Dirac/Dirac.jl
@@ -28,7 +28,7 @@ The parameters are:
 - `m0::T` : Mass of the fermion
 - `csw::T` : Improvement coefficient for the Csw term
 - `th{Ntuple{4,Complex{T}}}` : Phase for the fermions included in the boundary conditions, reabsorbed in the Dirac operator.
-- `tm` : Twisted mass paramete
+- `tm` : Twisted mass parameter
 - `ct` : Boundary improvement term, only used for Schrödinger Funtional boundary conditions. 
 """
 struct DiracParam{T,R}
@@ -534,12 +534,13 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{Sp
                     CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
                 end
             end
-
+            SF_bndfix!(dws.st,lp)
             @timeit "g5Dw" begin
                 CUDA.@sync begin
                     CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
                 end
             end
+            SF_bndfix!(so,lp)
         end
     else
         @timeit "DwdagDw" begin
@@ -549,12 +550,13 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{Sp
                     CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
                 end
             end
-
+            SF_bndfix!(dws.st,lp)
             @timeit "g5Dw" begin
                 CUDA.@sync begin
                     CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
                 end
             end
+            SF_bndfix!(so,lp)
         end
     end
 
@@ -604,10 +606,11 @@ end
 Sets all the values of `sp` in the  first time slice to zero.
 """
 function SF_bndfix!(sp, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
-    CUDA.@sync begin
-        CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(sp, lp)
+    @timeit "SF boundary fix" begin
+        CUDA.@sync begin
+            CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(sp, lp)
+        end
     end
-    
     return nothing
 end
 
diff --git a/src/Solvers/Propagators.jl b/src/Solvers/Propagators.jl
index 73ebcce..99cf462 100644
--- a/src/Solvers/Propagators.jl
+++ b/src/Solvers/Propagators.jl
@@ -56,7 +56,7 @@ end
 
     function bndpropagator!(pro,U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)
         
-Saves the propagator in from the t=0 boundary to the bulk for the SF boundary conditions for a source with color 'c' and spin 's'. The factor c_t is included while the factor 1/sqrt(V) is not.
+Saves the propagator from the t=0 boundary to the bulk for the SF boundary conditions for a source with color 'c' and spin 's' in 'pro'. The factor c_t is included while the factor 1/sqrt(V) is not.
 For the propagator from T to the bulk, use the function Tbndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)
 
 """
@@ -81,6 +81,7 @@ function bndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::Sp
         return nothing
     end
 
+    SF_bndfix!(pro,lp)
     fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
     
     CUDA.@sync begin
@@ -94,7 +95,7 @@ function bndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::Sp
     g5Dw!(pro,U,dpar.ct*dws.sp,dpar,dws,lp)
     
     CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
-    return pro
+    return nothing
 end
 
 """
@@ -124,7 +125,8 @@ function Tbndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::S
 
         return nothing
     end
-  
+
+    SF_bndfix!(pro,lp)
     fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
     
     CUDA.@sync begin
@@ -138,7 +140,7 @@ function Tbndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::S
     g5Dw!(pro,U,dpar.ct*dws.sp,dpar,dws,lp)
     
     CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
-    return pro
+    return nothing
 end
 
 
diff --git a/test/dirac/test_fp_fa.jl b/test/dirac/test_fp_fa.jl
index ea49897..adaf3eb 100644
--- a/test/dirac/test_fp_fa.jl
+++ b/test/dirac/test_fp_fa.jl
@@ -8,106 +8,106 @@ using TimerOutputs
 
 function fP_test(;theta = (0.5,0.7,1.0,0.0), m = 1.3, size = (8,8,8,16),prec = 1.0e-16)
 
-@timeit "fP inversion (x12)" begin
+    @timeit "fP inversion (x12)" begin
 
-lp = SpaceParm{4}(size,(4,4,4,4),1,(0,0,0,0,0,0));
-exptheta = exp.(im.*theta./lp.iL);
+    lp = SpaceParm{4}(size,(4,4,4,4),1,(0,0,0,0,0,0));
+    exptheta = exp.(im.*theta./lp.iL);
 
-dpar = DiracParam{Float64}(SU3fund,m,0.0,exptheta,0.0,1.0);
-dws = DiracWorkspace(SU3fund,Float64,lp);
+    dpar = DiracParam{Float64}(SU3fund,m,0.0,exptheta,0.0,1.0);
+    dws = DiracWorkspace(SU3fund,Float64,lp);
 
-U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64}));
-psi = scalar_field(Spinor{4,SU3fund{Float64}},lp);
+    U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64}));
+    psi = scalar_field(Spinor{4,SU3fund{Float64}},lp);
 
-res = zeros(lp.iL[4])
+    res = zeros(lp.iL[4])
 
-for s in 1:4 for c in 1:3
-    bndpropagator!(psi,U,dpar,dws,lp,1000,prec,c,s);
+    for s in 1:4 for c in 1:3
+        bndpropagator!(psi,U,dpar,dws,lp,1000,prec,c,s);
 
-    for t in 1:lp.iL[4]
-    #for i in 1:lp.iL[1]    for j in 1:lp.iL[2]        for k in 1:lp.iL[3]
-                i=abs(rand(Int))%lp.iL[1] +1;j=abs(rand(Int))%lp.iL[2] +1;k=abs(rand(Int))%lp.iL[3] +1;
-                CUDA.@allowscalar (res[t] += norm2(psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...])/2)
-    #end end end
-    #res[t] = res[t]/(lp.iL[1]*lp.iL[2]*lp.iL[3])
+        for t in 1:lp.iL[4]
+        #for i in 1:lp.iL[1]    for j in 1:lp.iL[2]        for k in 1:lp.iL[3]
+                    i=abs(rand(Int))%lp.iL[1] +1;j=abs(rand(Int))%lp.iL[2] +1;k=abs(rand(Int))%lp.iL[3] +1;
+                    CUDA.@allowscalar (res[t] += norm2(psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...])/2)
+        #end end end
+        #res[t] = res[t]/(lp.iL[1]*lp.iL[2]*lp.iL[3])
+
+        end
+
+    end end
 
     end
 
-end end
+    @timeit "fP analitical solution" begin
 
-end
+        #THEORETICAL SOLUTION: hep-lat/9606016 eq (2.33)
 
-@timeit "fP analitical solution" begin
+        res_th = zeros(lp.iL[4])
 
-    #THEORETICAL SOLUTION: hep-lat/9606016 eq (2.33)
+        pp3 = ntuple(i -> theta[i]/lp.iL[i],3)
+        omega = 2 * asinh(0.5* sqrt(( sum((sin.(pp3)).^2) + (m + 2*(sum((sin.(pp3./2)).^2) ))^2) / (1+m+2*(sum((sin.(pp3./2)).^2) )) ) )
+        pp = (-im*omega,pp3...)
+        Mpp = m + 2* sum((sin.(pp./2)).^2)
+        Rpp = Mpp*(1-exp(-2*omega*lp.iL[4])) + sinh(omega) * (1+exp(-2*omega*lp.iL[4]))
 
-    res_th = zeros(lp.iL[4])
+        for i in 2:lp.iL[4]
+            res_th[i] = (2*3*sinh(omega)/(Rpp^2)) * ( (Mpp + sinh(omega))*exp(-2*omega*(i-1)) - (Mpp - sinh(omega))*exp(-2*omega*(2*lp.iL[4]- (i - 1))) )
+        end
 
-    pp3 = ntuple(i -> theta[i]/lp.iL[i],3)
-    omega = 2 * asinh(0.5* sqrt(( sum((sin.(pp3)).^2) + (m + 2*(sum((sin.(pp3./2)).^2) ))^2) / (1+m+2*(sum((sin.(pp3./2)).^2) )) ) )
-    pp = (-im*omega,pp3...)
-    Mpp = m + 2* sum((sin.(pp./2)).^2)
-    Rpp = Mpp*(1-exp(-2*omega*lp.iL[4])) + sinh(omega) * (1+exp(-2*omega*lp.iL[4]))
-
-    for i in 2:lp.iL[4]
-        res_th[i] = (2*3*sinh(omega)/(Rpp^2)) * ( (Mpp + sinh(omega))*exp(-2*omega*(i-1)) - (Mpp - sinh(omega))*exp(-2*omega*(2*lp.iL[4]- (i - 1))) )
     end
-
-end
     return sum(abs.(res-res_th))
 
 end
 
 function fA_test(;theta = (0.5,0.7,1.0,0.0), m = 1.3, size = (8,8,8,16),prec = 1.0e-16)
 
-@timeit "fA inversion (x12)" begin
+    @timeit "fA inversion (x12)" begin
+
+        lp = SpaceParm{4}(size,(4,4,4,4),1,(0,0,0,0,0,0));
+        exptheta = exp.(im.*theta./lp.iL);
+
+        dpar = DiracParam{Float64}(SU3fund,m,0.0,exptheta,0.0,1.0);
+        dws = DiracWorkspace(SU3fund,Float64,lp);
+
+        U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64}));
+        psi = scalar_field(Spinor{4,SU3fund{Float64}},lp);
+
+        res = im*zeros(lp.iL[4])
+
+        for s in 1:4 for c in 1:3
+            bndpropagator!(psi,U,dpar,dws,lp,1000,prec,c,s);
+
+            for t in 1:lp.iL[4]
+            #for i in 1:lp.iL[1]    for j in 1:lp.iL[2]        for k in 1:lp.iL[3]
+                        i=abs(rand(Int))%lp.iL[1] +1;j=abs(rand(Int))%lp.iL[2] +1;k=abs(rand(Int))%lp.iL[3] +1;
+                        CUDA.@allowscalar (res[t] += -dot(psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...],dmul(Gamma{4},psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...]))/2)
+            #end end end
+            #res[t] = res[t]/(lp.iL[1]*lp.iL[2]*lp.iL[3])
+
+            end
+
+        end end
 
-    lp = SpaceParm{4}(size,(4,4,4,4),1,(0,0,0,0,0,0));
-    exptheta = exp.(im.*theta./lp.iL);
-    
-    dpar = DiracParam{Float64}(SU3fund,m,0.0,exptheta,0.0,1.0);
-    dws = DiracWorkspace(SU3fund,Float64,lp);
-    
-    U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64}));
-    psi = scalar_field(Spinor{4,SU3fund{Float64}},lp);
-    
-    res = im*zeros(lp.iL[4])
-    
-    for s in 1:4 for c in 1:3
-        bndpropagator!(psi,U,dpar,dws,lp,1000,prec,c,s);
-    
-        for t in 1:lp.iL[4]
-        #for i in 1:lp.iL[1]    for j in 1:lp.iL[2]        for k in 1:lp.iL[3]
-                    i=abs(rand(Int))%lp.iL[1] +1;j=abs(rand(Int))%lp.iL[2] +1;k=abs(rand(Int))%lp.iL[3] +1;
-                    CUDA.@allowscalar (res[t] += -dot(psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...],dmul(Gamma{4},psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...]))/2)
-        #end end end
-        #res[t] = res[t]/(lp.iL[1]*lp.iL[2]*lp.iL[3])
-    
-        end
-    
-    end end
-    
-end
-    #THEORETICAL SOLUTION: hep-lat/9606016 eq (2.32)
-   
-@timeit "fA analitical solution" begin
-    res_th = zeros(lp.iL[4])
-    
-    pp3 = ntuple(i -> theta[i]/lp.iL[i],3)
-    omega = 2 * asinh(0.5* sqrt(( sum((sin.(pp3)).^2) + (m + 2*(sum((sin.(pp3./2)).^2) ))^2) / (1+m+2*(sum((sin.(pp3./2)).^2) )) ) )
-    pp = (-im*omega,pp3...)
-    Mpp = m + 2* sum((sin.(pp./2)).^2)
-    Rpp = Mpp*(1-exp(-2*omega*lp.iL[4])) + sinh(omega) * (1+exp(-2*omega*lp.iL[4]))
-    
-    for i in 2:lp.iL[4]
-        res_th[i] = (6/(Rpp^2)) * ( 2*(Mpp - sinh(omega))*(Mpp + sinh(omega))*exp(-2*omega*lp.iL[4]) 
-        - Mpp*((Mpp + sinh(omega))*exp(-2*omega*(i-1)) + (Mpp - sinh(omega))*exp(-2*omega*(2*lp.iL[4]- (i - 1)))))
     end
-    
-end
-    
+        #THEORETICAL SOLUTION: hep-lat/9606016 eq (2.32)
+
+    @timeit "fA analitical solution" begin
+        res_th = zeros(lp.iL[4])
+
+        pp3 = ntuple(i -> theta[i]/lp.iL[i],3)
+        omega = 2 * asinh(0.5* sqrt(( sum((sin.(pp3)).^2) + (m + 2*(sum((sin.(pp3./2)).^2) ))^2) / (1+m+2*(sum((sin.(pp3./2)).^2) )) ) )
+        pp = (-im*omega,pp3...)
+        Mpp = m + 2* sum((sin.(pp./2)).^2)
+        Rpp = Mpp*(1-exp(-2*omega*lp.iL[4])) + sinh(omega) * (1+exp(-2*omega*lp.iL[4]))
+
+        for i in 2:lp.iL[4]
+            res_th[i] = (6/(Rpp^2)) * ( 2*(Mpp - sinh(omega))*(Mpp + sinh(omega))*exp(-2*omega*lp.iL[4])
+            - Mpp*((Mpp + sinh(omega))*exp(-2*omega*(i-1)) + (Mpp - sinh(omega))*exp(-2*omega*(2*lp.iL[4]- (i - 1)))))
+        end
+
+    end
+
     return sum(abs.(res-res_th))
-    
+
 end
 
 
diff --git a/test/dirac/test_solver_plw.jl b/test/dirac/test_solver_plw.jl
index 9bd1ba8..a4de0c7 100644
--- a/test/dirac/test_solver_plw.jl
+++ b/test/dirac/test_solver_plw.jl
@@ -96,15 +96,15 @@ end
 
 
 begin
-dif = 0.0
+diff = 0.0
 for i in 1:3 for j in 1:4
-    dif += Dwpw_test(c=i,s=j)
+    global diff += Dwpw_test(c=i,s=j)
 end end
 
-if dif < 1.0e-15
-    print("Dwpl test passed with average error ", dif/12,"!\n")    
+if diff < 1.0e-15
+    print("Dwpl test passed with average error ", diff/12,"!\n")
 else
-    error("Dwpl test failed with difference: ",dif,"\n")
+    error("Dwpl test failed with difference: ",diff,"\n")
 end