From 1b57d86c7969e813a1c81d5d53ccf9c116055466 Mon Sep 17 00:00:00 2001
From: "Fernando P. Panadero" <fernando.p@csic.es>
Date: Tue, 21 Nov 2023 11:44:02 +0100
Subject: [PATCH 01/18] Addition of SU2fund

---
 src/Dirac/Dirac.jl           | 25 +++++++++---
 src/Groups/AlgebraU2.jl      | 20 ++++++++++
 src/Groups/FundamentalSU2.jl | 76 ++++++++++++++++++++++++++++++++++++
 src/Groups/Groups.jl         |  7 +++-
 src/Groups/SU2Types.jl       | 10 +++++
 src/LatticeGPU.jl            |  2 +-
 6 files changed, 131 insertions(+), 9 deletions(-)
 create mode 100644 src/Groups/AlgebraU2.jl
 create mode 100644 src/Groups/FundamentalSU2.jl

diff --git a/src/Dirac/Dirac.jl b/src/Dirac/Dirac.jl
index 53e22d3..34e1950 100644
--- a/src/Dirac/Dirac.jl
+++ b/src/Dirac/Dirac.jl
@@ -54,14 +54,27 @@ struct DiracWorkspace{T}
     
     function DiracWorkspace(::Type{G}, ::Type{T}, lp::SpaceParm{4,6,B,D}) where {G,T <: AbstractFloat, B,D}
 
-        sr  = scalar_field(Spinor{4,G}, lp)
-        sp  = scalar_field(Spinor{4,G}, lp)
-        sAp = scalar_field(Spinor{4,G}, lp)
-        st  = scalar_field(Spinor{4,G}, lp)
-
-        csw = tensor_field(U3alg{T},lp)
+        @timeit "Allocating DiracWorkspace" begin
+            if G == SU3fund
+                sr  = scalar_field(Spinor{4,G}, lp)
+                sp  = scalar_field(Spinor{4,G}, lp)
+                sAp = scalar_field(Spinor{4,G}, lp)
+                st  = scalar_field(Spinor{4,G}, lp)
+                csw = tensor_field(U3alg{T},lp)
+            end
+            
+            if G == SU2fund
+                sr  = scalar_field(Spinor{4,G}, lp)
+                sp  = scalar_field(Spinor{4,G}, lp)
+                sAp = scalar_field(Spinor{4,G}, lp)
+                st  = scalar_field(Spinor{4,G}, lp)
+                csw = tensor_field(U2alg{T},lp)
+            end
+        end
 
         return new{T}(sr,sp,sAp,st,csw,cs)
+        end
+
     end
 end
 
diff --git a/src/Groups/AlgebraU2.jl b/src/Groups/AlgebraU2.jl
new file mode 100644
index 0000000..4e88638
--- /dev/null
+++ b/src/Groups/AlgebraU2.jl
@@ -0,0 +1,20 @@
+
+
+
+struct U2alg{T} <: Algebra
+    u11::T
+    u22::T
+    u12::Complex{T}
+end
+
+function antsym(a::SU2{T}) where T <: AbstractFloat
+    return U2alg{T}(2.0*imag(a.t1),-2.0*imag(a.t1),2.0*a.t2)
+end
+
+Base.:*(a::U2alg{T},b::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(im*a.u11*b.t1 + a.u12*b.t2,
+                                                                        -conj(a.u12)*b.t1 + im*a.u22*b.t2)
+
+Base.:+(a::U2alg{T},b::U2alg{T}) where T <: AbstractFloat = U2alg{T}(a.u11 + b.u11, a.u22 + b.u22, a.u12 + b.u12)
+
+Base.:*(r::Number, a::U2alg{T}) where T <: AbstractFloat = U2alg{T}(r*a.u11, r*a.u22, r*a.u12)
+
diff --git a/src/Groups/FundamentalSU2.jl b/src/Groups/FundamentalSU2.jl
new file mode 100644
index 0000000..1ab1a2d
--- /dev/null
+++ b/src/Groups/FundamentalSU2.jl
@@ -0,0 +1,76 @@
+
+
+Base.zero(::Type{SU2fund{T}}) where T <: AbstractFloat = SU2fund{T}(zero(T),zero(T))
+Random.rand(rng::AbstractRNG, ::Random.SamplerType{SU2fund{T}}) where T <: AbstractFloat = SU2fund{T}(complex(randn(rng,T),randn(rng,T)),
+                                                                                                      complex(randn(rng,T),randn(rng,T)))
+
+
+SU2fund(a::T, b::T)          where T <: AbstractFloat = SU2fund{T}(complex(a), complex(b))
+
+"""
+    dag(a::SU2fund{T})
+
+Returns the conjugate of a fundamental element. 
+"""
+dag(a::SU2fund{T})                 where T <: AbstractFloat = SU2fund{T}(conj(a.t1), conj(a.t2))
+
+"""
+    norm(a::SU2fund{T})
+
+Returns the norm of a fundamental element. Same result as dot(a,a).
+"""
+norm(a::SU2fund{T})                where T <: AbstractFloat = sqrt((abs2(a.t1) + abs2(a.t2)))
+
+"""
+    norm(a::SU2fund{T})
+
+Returns the norm of a fundamental element. Same result as sqrt(dot(a,a)).
+"""
+norm2(a::SU2fund{T})               where T <: AbstractFloat = (abs2(a.t1) + abs2(a.t2))
+
+"""
+    dot(a::SU2fund{T},b::SU2fund{T})
+
+Returns the scalar product of two fundamental elements. The convention is for the product to the linear in the second argument, and anti-linear in the first argument. 
+"""
+dot(g1::SU2fund{T},g2::SU2fund{T}) where T <: AbstractFloat = conj(g1.t1)*g2.t1+conj(g1.t2)*g2.t2
+
+"""
+    *(g::SU2{T},b::SU2fund{T})
+
+Returns ga
+"""
+Base.:*(g::SU2{T},b::SU2fund{T})     where T <: AbstractFloat = SU2fund{T}(g.t1*b.t1 + g.t2*b.t2,-conj(g.t2)*b.t1 + conj(g.t1)*b.t2)
+                                                                           
+"""
+    \\(g::SU2{T},b::SU2fund{T})
+
+Returns g^dag b
+"""
+Base.:\(g::SU2{T},b::SU2fund{T})     where T <: AbstractFloat = SU2fund{T}(conj(g.t1)*b.t1 - g.t2 * b.t2,conj(g.t2)*b.t1 + g.t1 * b.t2)
+
+"""
+    *(a::SU2alg{T},b::SU2fund{T})
+
+Returns a*b
+"""
+
+Base.:*(a::SU2alg{T},b::SU2fund{T})     where T <: AbstractFloat = SU2fund{T}(complex(0.0, a.t3)*b.t1/2 +  complex(a.t2,a.t1)*b.t2/2,
+                                                                              complex(-a.t2,a.t1)*b.t1/2 + complex(0.0, -a.t3)*b.t1/2)
+
+Base.:+(a::SU2fund{T},b::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(a.t1+b.t1,a.t2+b.t2)
+Base.:-(a::SU2fund{T},b::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(a.t1-b.t1,a.t2-b.t2)
+Base.:+(a::SU2fund{T})               where T <: AbstractFloat = SU2fund{T}(a.t1,a.t2)
+Base.:-(a::SU2fund{T})               where T <: AbstractFloat = SU2fund{T}(-a.t1,-a.t2)
+imm(a::SU2fund{T})                   where T <: AbstractFloat = SU2fund{T}(complex(-imag(a.t1),real(a.t1)),
+                                                                           complex(-imag(a.t2),real(a.t2)))
+mimm(a::SU2fund{T})                  where T <: AbstractFloat = SU2fund{T}(complex(imag(a.t1),-real(a.t1)),
+                                                                           complex(imag(a.t2),-real(a.t2)))
+
+# Operations with numbers
+Base.:*(a::SU2fund{T},b::Number) where T <: AbstractFloat = SU2fund{T}(b*a.t1,b*a.t2)
+Base.:*(b::Number,a::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(b*a.t1,b*a.t2)
+Base.:/(a::SU2fund{T},b::Number) where T <: AbstractFloat = SU2fund{T}(a.t1/b,a.t2/b)
+
+Base.:*(a::M2x2{T},b::SU2fund{T}) where T <: AbstractFloat = SU2fund{T}(a.u11*b.t1 + a.u12*b.t2,
+                                                                        a.u21*b.t1 + a.u22*b.t2)
\ No newline at end of file
diff --git a/src/Groups/Groups.jl b/src/Groups/Groups.jl
index 3ed40ca..c6380b5 100644
--- a/src/Groups/Groups.jl
+++ b/src/Groups/Groups.jl
@@ -54,11 +54,12 @@ export Group, Algebra, GMatrix
 # SU(2) and 2x2 matrix operations
 ##
 include("SU2Types.jl")
-export SU2, SU2alg, M2x2
+export SU2, SU2alg, M2x2, SU2fund
 
 include("GroupSU2.jl")
 include("M2x2.jl")
 include("AlgebraSU2.jl")
+include("FundamentalSU2.jl")
 ## END SU(2)
 
 ##
@@ -74,8 +75,10 @@ include("FundamentalSU3.jl")
 export imm, mimm
 ## END SU(3)
 
+
+include("AlgebraU2.jl")
 include("AlgebraU3.jl")
-export U3alg
+export U2alg, U3alg
 
 include("GroupU1.jl")
 export U1, U1alg
diff --git a/src/Groups/SU2Types.jl b/src/Groups/SU2Types.jl
index 4a6a0f7..7525743 100644
--- a/src/Groups/SU2Types.jl
+++ b/src/Groups/SU2Types.jl
@@ -53,3 +53,13 @@ Base.convert(::Type{M2x2{T}}, a::SU2{T}) where T = M2x2{T}(a.t1,a.t2,-conj(a.t2)
 
 Random.rand(rng::AbstractRNG, ::Random.SamplerType{SU2alg{T}}) where T <: AbstractFloat = SU2alg{T}(randn(rng,T),randn(rng,T),randn(rng,T))
 Random.rand(rng::AbstractRNG, ::Random.SamplerType{SU2{T}})    where T <: AbstractFloat = exp(SU2alg{T}(randn(rng,T),randn(rng,T),randn(rng,T)))
+
+
+struct SU2fund{T}
+    t1::Complex{T}
+    t2::Complex{T}
+end
+
+Base.zero(::Type{SU2fund{T}}) where T <: AbstractFloat = SU2fund{T}(zero(T),zero(T),zero(T))
+Random.rand(rng::AbstractRNG, ::Random.SamplerType{SU2fund{T}}) where T <: AbstractFloat = SU2fund{T}(complex(randn(rng,T),randn(rng,T)),
+                                                                                                      complex(randn(rng,T),randn(rng,T)))
\ No newline at end of file
diff --git a/src/LatticeGPU.jl b/src/LatticeGPU.jl
index bb640f2..6eccf5c 100644
--- a/src/LatticeGPU.jl
+++ b/src/LatticeGPU.jl
@@ -16,7 +16,7 @@ include("Groups/Groups.jl")
 
 using .Groups
 export Group, Algebra, GMatrix
-export SU2, SU2alg, SU3, SU3alg, M3x3, M2x2, U1, U1alg, SU3fund, U3alg
+export SU2, SU2alg, SU3, SU3alg, M3x3, M2x2, U1, U1alg, SU3fund, U3alg, SU2fund, U2alg
 export dot, expm, exp, dag, unitarize, inverse, tr, projalg, norm, norm2, isgroup, alg2mat, dev_one, antsym
 
 include("Space/Space.jl")

From a4edc31570abc2bb9139cf6508d30d9293adb725 Mon Sep 17 00:00:00 2001
From: "Fernando P. Panadero" <fernando.p@csic.es>
Date: Tue, 21 Nov 2023 11:53:10 +0100
Subject: [PATCH 02/18] Compilation Fix

---
 src/Dirac/Dirac.jl           | 2 --
 src/Groups/FundamentalSU2.jl | 4 ----
 2 files changed, 6 deletions(-)

diff --git a/src/Dirac/Dirac.jl b/src/Dirac/Dirac.jl
index 34e1950..0d1f36b 100644
--- a/src/Dirac/Dirac.jl
+++ b/src/Dirac/Dirac.jl
@@ -54,7 +54,6 @@ struct DiracWorkspace{T}
     
     function DiracWorkspace(::Type{G}, ::Type{T}, lp::SpaceParm{4,6,B,D}) where {G,T <: AbstractFloat, B,D}
 
-        @timeit "Allocating DiracWorkspace" begin
             if G == SU3fund
                 sr  = scalar_field(Spinor{4,G}, lp)
                 sp  = scalar_field(Spinor{4,G}, lp)
@@ -70,7 +69,6 @@ struct DiracWorkspace{T}
                 st  = scalar_field(Spinor{4,G}, lp)
                 csw = tensor_field(U2alg{T},lp)
             end
-        end
 
         return new{T}(sr,sp,sAp,st,csw,cs)
         end
diff --git a/src/Groups/FundamentalSU2.jl b/src/Groups/FundamentalSU2.jl
index 1ab1a2d..c44a4d5 100644
--- a/src/Groups/FundamentalSU2.jl
+++ b/src/Groups/FundamentalSU2.jl
@@ -1,9 +1,5 @@
 
 
-Base.zero(::Type{SU2fund{T}}) where T <: AbstractFloat = SU2fund{T}(zero(T),zero(T))
-Random.rand(rng::AbstractRNG, ::Random.SamplerType{SU2fund{T}}) where T <: AbstractFloat = SU2fund{T}(complex(randn(rng,T),randn(rng,T)),
-                                                                                                      complex(randn(rng,T),randn(rng,T)))
-
 
 SU2fund(a::T, b::T)          where T <: AbstractFloat = SU2fund{T}(complex(a), complex(b))
 

From 80bae3d56785a46099655279c7ad0fe74d7f5cc0 Mon Sep 17 00:00:00 2001
From: "Fernando P. Panadero" <fernando.p@csic.es>
Date: Tue, 21 Nov 2023 12:22:11 +0100
Subject: [PATCH 03/18] Removed extra "end"

---
 src/Dirac/Dirac.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Dirac/Dirac.jl b/src/Dirac/Dirac.jl
index 0d1f36b..832b3e9 100644
--- a/src/Dirac/Dirac.jl
+++ b/src/Dirac/Dirac.jl
@@ -54,6 +54,7 @@ struct DiracWorkspace{T}
     
     function DiracWorkspace(::Type{G}, ::Type{T}, lp::SpaceParm{4,6,B,D}) where {G,T <: AbstractFloat, B,D}
 
+        @timeit "Allocating DiracWorkspace" begin
             if G == SU3fund
                 sr  = scalar_field(Spinor{4,G}, lp)
                 sp  = scalar_field(Spinor{4,G}, lp)
@@ -69,11 +70,11 @@ struct DiracWorkspace{T}
                 st  = scalar_field(Spinor{4,G}, lp)
                 csw = tensor_field(U2alg{T},lp)
             end
+        end
 
         return new{T}(sr,sp,sAp,st,csw,cs)
         end
 
-    end
 end
 
 export DiracWorkspace, DiracParam

From 50f07f40e603202207fdf66947c59da61b6d45d1 Mon Sep 17 00:00:00 2001
From: "Fernando P. Panadero" <fernando.p@csic.es>
Date: Tue, 21 Nov 2023 14:37:08 +0100
Subject: [PATCH 04/18] Constructor for dws fixed

---
 src/Dirac/Dirac.jl             | 2 +-
 test/dirac/test_fp_fa.jl       | 4 ++--
 test/dirac/test_solver_plw.jl  | 2 +-
 test/dirac/test_solver_rand.jl | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/Dirac/Dirac.jl b/src/Dirac/Dirac.jl
index 832b3e9..8fd19af 100644
--- a/src/Dirac/Dirac.jl
+++ b/src/Dirac/Dirac.jl
@@ -72,7 +72,7 @@ struct DiracWorkspace{T}
             end
         end
 
-        return new{T}(sr,sp,sAp,st,csw,cs)
+        return new{T}(sr,sp,sAp,st,csw)
         end
 
 end
diff --git a/test/dirac/test_fp_fa.jl b/test/dirac/test_fp_fa.jl
index ee931ae..6610bb4 100644
--- a/test/dirac/test_fp_fa.jl
+++ b/test/dirac/test_fp_fa.jl
@@ -14,7 +14,7 @@ lp = SpaceParm{4}(size,(4,4,4,4),1,(0,0,0,0,0,0));
 exptheta = exp.(im.*theta./lp.iL);
 
 dpar = DiracParam{Float64}(SU3fund,m,0.0,exptheta,1.0);
-dws = DiracWorkspace(SU3fund{Float64},Float64,lp);
+dws = DiracWorkspace(SU3fund,Float64,lp);
 
 U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64}));
 psi = scalar_field(Spinor{4,SU3fund{Float64}},lp);
@@ -66,7 +66,7 @@ function fA_test(;theta = (0.5,0.7,1.0,0.0), m = 1.3, size = (8,8,8,16),prec = 1
     exptheta = exp.(im.*theta./lp.iL);
     
     dpar = DiracParam{Float64}(SU3fund,m,0.0,exptheta,1.0);
-    dws = DiracWorkspace(SU3fund{Float64},Float64,lp);
+    dws = DiracWorkspace(SU3fund,Float64,lp);
     
     U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64}));
     psi = scalar_field(Spinor{4,SU3fund{Float64}},lp);
diff --git a/test/dirac/test_solver_plw.jl b/test/dirac/test_solver_plw.jl
index 14100d0..195a1a3 100644
--- a/test/dirac/test_solver_plw.jl
+++ b/test/dirac/test_solver_plw.jl
@@ -8,7 +8,7 @@ function Dwpw_test(;p=0,s=1,c=1)
 lp = SpaceParm{4}((16,16,16,16), (4,4,4,4), 0, (0,0,0,0,0,0))
 gp = GaugeParm{Float64}(SU3{Float64}, 6.0, 1.0)
 dpar = DiracParam{Float64}(SU3fund,1.3,0.0,(1.0,1.0,1.0,1.0),0.0)
-dws = DiracWorkspace(SU3fund{Float64},Float64,lp);
+dws = DiracWorkspace(SU3fund,Float64,lp);
 
 p==0 ? p = Int.(round.(lp.iL.*rand(4),RoundUp)) : nothing
 U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64}))
diff --git a/test/dirac/test_solver_rand.jl b/test/dirac/test_solver_rand.jl
index 91a477d..c06de0a 100644
--- a/test/dirac/test_solver_rand.jl
+++ b/test/dirac/test_solver_rand.jl
@@ -10,7 +10,7 @@ using CUDA, LatticeGPU, TimerOutputs
     gp = GaugeParm{Float64}(SU3{Float64}, 6.0, 1.0)
     ymws = YMworkspace(SU3, Float64, lp)
     dpar = DiracParam{Float64}(SU3fund,2.3,0.0,(1.0,1.0,1.0,1.0),0.0)
-    dws = DiracWorkspace(SU3fund{Float64},Float64,lp);
+    dws = DiracWorkspace(SU3fund,Float64,lp);
 
     randomize!(ymws.mom, lp, ymws)
     U = exp.(ymws.mom)

From 0aae72cceababa2a27285d20b1d19d2fd7ca54ab Mon Sep 17 00:00:00 2001
From: "Fernando P. Panadero" <fernando.p@csic.es>
Date: Tue, 21 Nov 2023 14:45:06 +0100
Subject: [PATCH 05/18] Extension for the constructor

---
 src/Dirac/Dirac.jl | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/Dirac/Dirac.jl b/src/Dirac/Dirac.jl
index 8fd19af..2d78098 100644
--- a/src/Dirac/Dirac.jl
+++ b/src/Dirac/Dirac.jl
@@ -56,19 +56,23 @@ struct DiracWorkspace{T}
 
         @timeit "Allocating DiracWorkspace" begin
             if G == SU3fund
-                sr  = scalar_field(Spinor{4,G}, lp)
-                sp  = scalar_field(Spinor{4,G}, lp)
-                sAp = scalar_field(Spinor{4,G}, lp)
-                st  = scalar_field(Spinor{4,G}, lp)
+                sr  = scalar_field(Spinor{4,SU3fund{T}}, lp)
+                sp  = scalar_field(Spinor{4,SU3fund{T}}, lp)
+                sAp = scalar_field(Spinor{4,SU3fund{T}}, lp)
+                st  = scalar_field(Spinor{4,SU3fund{T}}, lp)
                 csw = tensor_field(U3alg{T},lp)
-            end
-            
-            if G == SU2fund
+            elseif G == SU2fund
+                sr  = scalar_field(Spinor{4,SU2fund{T}}, lp)
+                sp  = scalar_field(Spinor{4,SU2fund{T}}, lp)
+                sAp = scalar_field(Spinor{4,SU2fund{T}}, lp)
+                st  = scalar_field(Spinor{4,SU2fund{T}}, lp)
+                csw = tensor_field(U2alg{T},lp)
+            else
                 sr  = scalar_field(Spinor{4,G}, lp)
                 sp  = scalar_field(Spinor{4,G}, lp)
                 sAp = scalar_field(Spinor{4,G}, lp)
                 st  = scalar_field(Spinor{4,G}, lp)
-                csw = tensor_field(U2alg{T},lp)
+                csw = nothing
             end
         end
 

From 107586289a28551df70500dae046029f46060d72 Mon Sep 17 00:00:00 2001
From: "Fernando P. Panadero" <fernando.p@csic.es>
Date: Tue, 21 Nov 2023 16:39:25 +0100
Subject: [PATCH 06/18] read_gp function

---
 src/LatticeGPU.jl |  2 +-
 src/YM/YM.jl      |  2 +-
 src/YM/YMio.jl    | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/src/LatticeGPU.jl b/src/LatticeGPU.jl
index 6eccf5c..6c615b1 100644
--- a/src/LatticeGPU.jl
+++ b/src/LatticeGPU.jl
@@ -45,7 +45,7 @@ export Eoft_clover, Eoft_plaq, Qtop
 export FlowIntr, wfl_euler, zfl_euler, wfl_rk2, zfl_rk2, wfl_rk3, zfl_rk3
 export flw, flw_adapt
 export sfcoupling, bndfield, setbndfield
-export import_lex64, import_cern64, import_bsfqcd, save_cnfg, read_cnfg
+export import_lex64, import_cern64, import_bsfqcd, save_cnfg, read_cnfg, read_gp
 
 include("Spinors/Spinors.jl")
 
diff --git a/src/YM/YM.jl b/src/YM/YM.jl
index 490a434..f279b1a 100644
--- a/src/YM/YM.jl
+++ b/src/YM/YM.jl
@@ -147,6 +147,6 @@ include("YMsf.jl")
 export sfcoupling, bndfield, setbndfield
 
 include("YMio.jl")
-export import_lex64, import_cern64, import_bsfqcd, save_cnfg, read_cnfg
+export import_lex64, import_cern64, import_bsfqcd, save_cnfg, read_cnfg, read_gp
 
 end
diff --git a/src/YM/YMio.jl b/src/YM/YMio.jl
index 41e6132..f492497 100644
--- a/src/YM/YMio.jl
+++ b/src/YM/YMio.jl
@@ -297,3 +297,50 @@ function import_cern64(fname, ibc, lp::SpaceParm; log=true)
 
     return CuArray(Ucpu)
 end
+
+
+
+"""
+    read_gp(fname::String)
+
+Reads Gauge parameters from file `fname` using the native (BDIO) format. Returns GaugeParm and SpaceParm.
+"""
+function read_gp(fname::String)
+
+    UID_HDR = 14
+    fb = BDIO_open(fname, "r")
+    while BDIO_get_uinfo(fb) != UID_HDR
+        BDIO_seek!(fb)
+    end
+    ihdr = Vector{Int32}(undef, 2)
+    BDIO_read(fb, ihdr)
+    if (ihdr[1] != convert(Int32, 1653996111)) && (ihdr[2] != convert(Int32, 2))
+        error("Wrong file format [header]")
+    end
+
+    run = BDIO.BDIO_read_str(fb)
+
+    while BDIO_get_uinfo(fb) != 1
+        BDIO_seek!(fb)
+    end
+
+    ifoo = Vector{Int32}(undef, 4)
+    BDIO_read(fb, ifoo)
+    ndim = convert(Int64, ifoo[1])
+    npls = convert(Int64, round(ndim*(ndim-1)/2))
+    ibc  = convert(Int64, ifoo[2])
+    nf   = ifoo[4]
+    
+    ifoo = Vector{Int32}(undef, ndim+convert(Int32, npls))
+    BDIO_read(fb, ifoo)
+    iL  = ntuple(i -> convert(Int64, ifoo[i]),ndim)
+    ntw = ntuple(i -> convert(Int64, ifoo[i+ndim]), npls)
+
+    dfoo = Vector{Float64}(undef, 4)
+    BDIO_read(fb, dfoo)
+
+    lp = SpaceParm{ndim}(iL, (4,4,4,4), ibc, ntw)
+    gp = GaugeParm{Float64}(SU3{Float64}, dfoo[1], dfoo[2])
+    
+    return gp, lp
+end

From 06e08c50166053e2f0d1fe727b7f02a94cd9ee21 Mon Sep 17 00:00:00 2001
From: "Fernando P. Panadero" <fernando.p@csic.es>
Date: Fri, 24 Nov 2023 09:21:03 +0100
Subject: [PATCH 07/18] Functions help

---
 src/Dirac/Dirac.jl      | 47 +++++++++++++++++++++++++++++++++++++++++
 src/Groups/AlgebraU2.jl | 11 ++++++++++
 src/Groups/AlgebraU3.jl |  9 ++++++++
 src/Solvers/CG.jl       | 11 +++++-----
 4 files changed, 73 insertions(+), 5 deletions(-)

diff --git a/src/Dirac/Dirac.jl b/src/Dirac/Dirac.jl
index 2d78098..4670fc2 100644
--- a/src/Dirac/Dirac.jl
+++ b/src/Dirac/Dirac.jl
@@ -19,7 +19,17 @@ using ..Fields
 using ..YM
 using ..Spinors
 
+"""
+    struct DiracParam{T,R}
 
+Stores the parameters of the Dirac operator. It can be generated via the constructor `function DiracParam{T}(::Type{R},m0,csw,th,ct)`. The first argument can be ommited and is taken to be `SU3fund`.
+The parameters are:
+
+- `m0::T` : Mass of the fermion
+- `csw::T` : Improvement coefficient for the Csw term
+- `th{Ntuple{4,Complex{T}}}` : Phase for the fermions included in the boundary conditions, reabsorbed in the Dirac operator.
+- `ct` : Boundary improvement term, only used for Schrödinger Funtional boundary conditions. 
+"""
 struct DiracParam{T,R}
     m0::T
     csw::T
@@ -45,6 +55,18 @@ function Base.show(io::IO, dpar::DiracParam{T,R}) where {T,R}
     return nothing
 end
 
+
+"""
+    struct DiracWorkspace{T}    
+
+Workspace needed to work with fermion fields. It contains four scalar fermion fields and, for the SU2fund and SU3fund, a U(N) field to store the clover term.
+
+It can be created with the constructor `DiracWorkspace(::Type{G}, ::Type{T}, lp::SpaceParm{4,6,B,D})`. For example:
+
+dws = DiracWorkspace(SU2fund,Float64,lp);
+dws = DiracWorkspace(SU3fund,Float64,lp);
+
+"""
 struct DiracWorkspace{T}
     sr
     sp
@@ -146,6 +168,14 @@ function krnl_csw!(csw::AbstractArray{T}, U, Ubnd, ipl, lp::SpaceParm{4,M,B,D})
     return nothing
 end
 
+
+"""
+    function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
+
+Computes the Dirac operator (with the Wilson term) `\`\ D_w \`\` with gauge field U and parameters `dpar` of the field `si` and stores it in `so`. 
+If `dpar.csw` is different from zero, the clover term should be stored in `dws.csw` via the Csw! function and is automatically included in the operator.
+
+"""
 function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
        
     if abs(dpar.csw) > 1.0E-10
@@ -310,6 +340,12 @@ function krnl_Dw!(so, U, si, m0, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},S
     return nothing
 end
 
+"""
+    function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})    
+
+Computes \`\` \\gamma_5 \`\` times the Dirac operator (with the Wilson term) with gauge field U and parameters `dpar` of the field `si` and stores it in `so`. 
+If `dpar.csw` is different from zero, the clover term should be stored in `dws.csw` via the Csw! function and is automatically included in the operator.
+"""
 function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
        
     if abs(dpar.csw) > 1.0E-10
@@ -480,6 +516,13 @@ function krnl_g5Dw!(so, U, si, m0, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D}
     return nothing
 end
 
+"""
+    function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
+
+Applies the operator \`\` \\gamma_5 D_w \`\` twice to `si` and stores the result in `so`. This is equivalent to appling the operator \`\` \`\`
+The Dirac operator is the same as in the functions `Dw!` and `g5Dw!`
+"""
+
 function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
 
     if abs(dpar.csw) > 1.0E-10
@@ -554,7 +597,11 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpacePar
     return nothing
 end
 
+"""
+    SF_bndfix!(sp, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}})
 
+Sets all the values of `sp` in the  first time slice to zero.
+"""
 function SF_bndfix!(sp, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
     CUDA.@sync begin
         CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(sp, lp)
diff --git a/src/Groups/AlgebraU2.jl b/src/Groups/AlgebraU2.jl
index 4e88638..c64c8a3 100644
--- a/src/Groups/AlgebraU2.jl
+++ b/src/Groups/AlgebraU2.jl
@@ -1,12 +1,23 @@
 
 
 
+"""
+    struct U2alg{T} <: Algebra
+
+Elements of the `U(2)` Algebra. The type `T <: AbstractFloat` can be used to define single or double precision elements.
+"""
 struct U2alg{T} <: Algebra
     u11::T
     u22::T
     u12::Complex{T}
 end
 
+
+"""
+    antsym(a::SU2{T}) where T <: AbstractFloat
+
+Returns the antisymmetrization of the SU2 element `a`, that is `\`\ a - a^{\\dagger} `\`. This method returns al element of `U2alg{T}`.
+"""
 function antsym(a::SU2{T}) where T <: AbstractFloat
     return U2alg{T}(2.0*imag(a.t1),-2.0*imag(a.t1),2.0*a.t2)
 end
diff --git a/src/Groups/AlgebraU3.jl b/src/Groups/AlgebraU3.jl
index fa03579..4bc42ba 100644
--- a/src/Groups/AlgebraU3.jl
+++ b/src/Groups/AlgebraU3.jl
@@ -1,6 +1,10 @@
 
 
+"""
+    struct U3alg{T} <: Algebra
 
+Elements of the `U(3)` Algebra. The type `T <: AbstractFloat` can be used to define single or double precision elements.
+"""
 struct U3alg{T} <: Algebra
     u11::T
     u22::T
@@ -10,6 +14,11 @@ struct U3alg{T} <: Algebra
     u23::Complex{T}
 end
 
+"""
+    antsym(a::SU3{T}) where T <: AbstractFloat
+
+Returns the antisymmetrization of the SU3 element `a`, that is `\`\ a - a^{\\dagger} `\`. This method returns al element of `U3alg{T}`.
+"""
 function antsym(a::SU3{T}) where T <: AbstractFloat
     t1 = 2.0*imag(a.u11)
     t2 = 2.0*imag(a.u22)
diff --git a/src/Solvers/CG.jl b/src/Solvers/CG.jl
index 8e86c7b..5d0e3c3 100644
--- a/src/Solvers/CG.jl
+++ b/src/Solvers/CG.jl
@@ -9,11 +9,6 @@
 ### created: Tue Nov 30 11:10:57 2021
 ###                               
 
-"""
-    function CG!
-
-Solves the linear equation `Ax = si`
-"""
 function krnl_dot!(sum,fone,ftwo)
     b=Int64(CUDA.threadIdx().x)
     r=Int64(CUDA.blockIdx().x)
@@ -32,6 +27,12 @@ function field_dot(fone::AbstractArray,ftwo::AbstractArray,sumf,lp) where {T}
     return sum(sumf)
 end
 
+
+"""
+    function CG!
+
+Solves the linear equation `Ax = si`
+"""
 function CG!(si, U, A, dpar::DiracParam, lp::SpaceParm, dws::DiracWorkspace{T}, maxiter::Int64 = 10, tol=1.0) where {T}
 
     dws.sr .= si

From d58f236d8c085abb962827c9dc739c91830f69d4 Mon Sep 17 00:00:00 2001
From: "Fernando P. Panadero" <fernando.p@csic.es>
Date: Tue, 28 Nov 2023 15:30:00 +0100
Subject: [PATCH 08/18] Dirac documentation

---
 docs/make.jl      |   1 +
 docs/src/dirac.md | 103 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+)
 create mode 100644 docs/src/dirac.md

diff --git a/docs/make.jl b/docs/make.jl
index 60b7af4..d1856c2 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -10,5 +10,6 @@ makedocs(sitename="LatticeGPU", modules=[LatticeGPU], doctest=true,
              "Space-time" => "space.md",
              "Groups and algebras" => "groups.md",
              "Fields" => "fields.md"
+             "Dirac" => "dirac.md"
              ], 
          repo = "https://igit.ific.uv.es/alramos/latticegpu.jl")
diff --git a/docs/src/dirac.md b/docs/src/dirac.md
new file mode 100644
index 0000000..b911ab8
--- /dev/null
+++ b/docs/src/dirac.md
@@ -0,0 +1,103 @@
+
+# Dirac operator
+
+The module `Dirac` has the necessary stuctures and function 
+to simulate non-dynamical 4-dimensional Wilson fermions.
+
+There are two main data structures in this module, the structure [`DiracParam`](@ref)
+
+```@docs
+DiracParam
+```
+
+and the workspace [`DiracWorkspace`](@ref)
+
+```@docs
+DiracWorkspace
+```
+
+The workspace stores four fermion fields, namely `.sr`, `.sp`, `.sAp` and `.st`, used
+for different purposes. If the representation is either `SU2fund` of `SU3fund`, an extra
+field with values in `U2alg`/`U3alg` is created to store the clover, used for the improvent.
+
+## Functions
+
+The functions [`Dw!`](@ref), [`g5Dw!`](@ref) and [`DwdagDw!`](@ref) are all related to the 
+Wilson-Dirac operator.
+
+The action of the Dirac operator `Dw!` is the following:
+
+```math
+D\psi (\vec{x} = x_1,x_2,x_3,x_4) = (4 + m_0)psi(\vec{x})
+```
+```math
+    - \frac{1}{2}\sum_{\mu = 1}^4 \theta (\mu) (1-\gamma_\mu) U_\mu(\vec{x}) \psi(\vec{x} + \hat{\mu}) 
+```
+```math
+    + \theta^* (\mu) (1 + \gamma_\mu) U^{-1}_\mu(\vec{x} - \hat{\mu}) \psi(\vec{x} - \hat{\mu})
+```
+
+where $$m_0$$ and $$\theta$$ are respectively the values `.m0` and `.th` of [`DiracParam`](@ref).
+Note that $$|\theta(i)|=1$$ is not built into the code, so it should be imposed explicitly. 
+
+Additionally, if |`dpar.csw`| > 1.0E-10, the clover term is assumed to be stored in `ymws.csw`, which
+can be done via the [`Csw`](@ref) function. In this case we have an extra term in `Dw!`:
+
+```math
+    \frac{i}{2}C_{sw} \sum_{\pi = 1}^6 F^{cl}_\pi \sigma_\pi \psi(\vec{x})
+```
+where the $$\sigma$$ matrices are those described in the `Spinors` module and the index $$\pi$$ runs
+as specified in `lp.plidx`.
+
+If the boudary conditions, defined in `lp`, are either `BC_SF_ORBI,D` or `BC_SF_AFWB`, the 
+improvement term 
+
+```math
+    (c_t -1) (\delta_{x_4,a} \psi(\vec{x}) + \delta_{x_4,T-a} \psi(\vec{x}))
+```
+is added. Since the time-slice $$t=T$$ is not stored, this accounts to modifying the second
+and last time-slice. 
+
+Note that the Dirac operator for SF boundary conditions assumes that the value of the field 
+in the first time-slice is zero. To enforce this, we have the function
+
+```@docs
+SF_bndfix!
+```
+
+The function [`Csw`](@ref) is used to store the clover in `dws.csw`. It is computed 
+according to the expression
+
+```math
+F_{\mu,\nu} = \frac{1}{8} (Q_{\mu \nu} - Q_{\nu \mu})
+```
+
+where 
+```math
+Q_{\mu\nu} = U_\mu(\vec{x})U_{\nu}(x+\mu)U_{\mu}^{-1}(\vec{x}+\nu)U_{\nu}(\vec{x}) +
++ U_{\nu}^{-1}(\vec{x}-\nu) U_\mu (\vec{x}-\nu) U_{\nu}(\vec{x} +\mu - \nu) U^{-1}_{\mu}(\vec{x}) +
++ U^{-1}_{\mu}(x-\mu)U_\nu^{-1}(\vec{x} - \mu - \nu)U_\mu(\vec{x} - \mu - \nu)U_\nu^{-1}(x-\nu) +
++ U_{\nu}(\vec{x})U_{\mu}^{-1}(\vec{x} + \nu - \mu)U^{-1}_{\nu}(\vec{x} - \mu)U_\mu(\vec{x}-\mu)
+
+```
+
+The correspondence between the tensor field and the GPU-Array is the following:
+```math
+F[b,1,r] \to F_{41}(b,r) ,\quad F[b,2,r] \to F_{42}(b,r) ,\quad F[b,3,r] \to F_{43}(b,r) 
+```
+```math
+F[b,4,r] \to F_{31}(b,r) ,\quad F[b,5,r] \to F_{32}(b,r) ,\quad F[b,6,r] \to F_{21}(b,r) 
+```
+where $$(b,r)$$ labels the lattice points as explained in the module `Space`
+
+The function [`pfrandomize!`](@ref), userfull for stochastic sources, is also present. 
+
+The generic interface of these functions reads
+
+```@docs
+Dw!
+g5Dw!
+DwdagDw!
+Csw!
+pfrandomize!
+```

From d997df20d41c398f9a647137b939364bc1c7b50c Mon Sep 17 00:00:00 2001
From: "Fernando P. Panadero" <fernando.p@csic.es>
Date: Thu, 30 Nov 2023 11:37:26 +0100
Subject: [PATCH 09/18] Removed harmless bug in Propagators.jl

---
 src/Solvers/Propagators.jl | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/Solvers/Propagators.jl b/src/Solvers/Propagators.jl
index dc42f5b..541f0ed 100644
--- a/src/Solvers/Propagators.jl
+++ b/src/Solvers/Propagators.jl
@@ -47,10 +47,6 @@ function propagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::Space
     end
        
     g5Dw!(pro,U,dws.sp,dpar,dws,lp)
-
-    CUDA.@sync begin
-        CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
-    end
       
     CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
     return nothing

From ed9d18b25dc21bd74d348414969531ec3487bd8a Mon Sep 17 00:00:00 2001
From: "Fernando P. Panadero" <fernando.p@csic.es>
Date: Thu, 30 Nov 2023 12:18:11 +0100
Subject: [PATCH 10/18] Solvers documentation

---
 docs/make.jl        |  1 +
 docs/src/solvers.md | 86 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 87 insertions(+)
 create mode 100644 docs/src/solvers.md

diff --git a/docs/make.jl b/docs/make.jl
index d1856c2..f17a9fd 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -11,5 +11,6 @@ makedocs(sitename="LatticeGPU", modules=[LatticeGPU], doctest=true,
              "Groups and algebras" => "groups.md",
              "Fields" => "fields.md"
              "Dirac" => "dirac.md"
+             "Solvers" => "solvers.md"
              ], 
          repo = "https://igit.ific.uv.es/alramos/latticegpu.jl")
diff --git a/docs/src/solvers.md b/docs/src/solvers.md
new file mode 100644
index 0000000..b6fc606
--- /dev/null
+++ b/docs/src/solvers.md
@@ -0,0 +1,86 @@
+
+# Solvers
+
+The module `Solvers` contains the functions to invert the Dirac
+operator as well as functions to obtain specific propagators.
+
+
+## CG.jl
+
+The function [`CG!`](@ref) implements the Conjugate gradient 
+algorith for the operator A
+
+```@docs
+CG!
+```
+
+where the tolerance is normalized with respect to $$|$$``si``$$|^2$$.
+The operator A must have the same input structure as all the Dirac
+operators. If the maximum number of iterations `maxiter` is reached,
+the function will throw an error. The estimation for $$A^{-1}x$$ is
+stored in ``si``, and the number of iterations is returned.
+
+
+Note that all the fermion field in ``dws`` are used
+inside the function and will be modified. In particular, the final residue
+is given by $$|$$``dws.sr``$$|^2$$.
+
+
+
+## Propagators.jl
+
+In this file, we define a couple of useful functions to obtain certain
+propagators.
+
+```@docs
+propagator!
+```
+
+Internally, this function solves the equation 
+
+```math
+    D_w^\dagger D_w \psi = \gamma_5 D_w \gamma_5 \eta
+```
+
+where $$\eta$$ is either a point-source with the specified color and spin
+or a random source in a time-slice and stores the value in ``pro``. 
+To solve this equation, the [`CG!`](@ref) function is used.
+
+
+For the case of SF boundary conditions, we have the boundary-to-bulk 
+propagator, implemented by the function [`bndpropagator!`](@ref)
+
+```@docs
+bndpropagator!
+```
+
+This propagator is defined by the equation:
+
+```math
+D_W S(x) = \frac{c_t}{\sqrt{V}} \delta_{x_0,1} U_0^\dagger(0,\vec{x}) P_+
+```
+
+The analog for the other boundary is implemented in the function [`Tbndpropagator!`](@ref)
+
+```@docs
+Tbndpropagator!
+```
+
+defined by the equation:
+
+```math
+D_W R(x) = \frac{c_t}{\sqrt{V}} \delta_{x_0,T-1} U_0(T-1,\vec{x}) P_-
+```
+
+Where $$P_\pm = (1 \pm \gamma_0)/2$$. The boundary-to-boundary 
+propagator
+
+```math
+    - \frac{c_t}{\sqrt{V}} \sum_\vec{x} U_0 ^\dagger (T-1,\vec{x}) P_+ S(T-1,\vec{x})
+```
+
+is computed by the function [`bndtobnd`](@ref)
+
+```@docs
+bndtobnd
+```

From 30d3c2ae89c74ea7520329bb1e5e991cb48daa5a Mon Sep 17 00:00:00 2001
From: "Fernando P. Panadero" <fernando.p@csic.es>
Date: Thu, 30 Nov 2023 12:19:10 +0100
Subject: [PATCH 11/18] Small changes in documentation

---
 docs/src/dirac.md          | 5 +++--
 src/Solvers/Propagators.jl | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/docs/src/dirac.md b/docs/src/dirac.md
index b911ab8..bcca0d3 100644
--- a/docs/src/dirac.md
+++ b/docs/src/dirac.md
@@ -41,10 +41,11 @@ where $$m_0$$ and $$\theta$$ are respectively the values `.m0` and `.th` of [`Di
 Note that $$|\theta(i)|=1$$ is not built into the code, so it should be imposed explicitly. 
 
 Additionally, if |`dpar.csw`| > 1.0E-10, the clover term is assumed to be stored in `ymws.csw`, which
-can be done via the [`Csw`](@ref) function. In this case we have an extra term in `Dw!`:
+can be done via the [`Csw`](@ref) function. In this case we have the Sheikholeslami–Wohlert (SW) term
+in `Dw!`:
 
 ```math
-    \frac{i}{2}C_{sw} \sum_{\pi = 1}^6 F^{cl}_\pi \sigma_\pi \psi(\vec{x})
+    \frac{i}{2}c_{sw} \sum_{\pi = 1}^6 F^{cl}_\pi \sigma_\pi \psi(\vec{x})
 ```
 where the $$\sigma$$ matrices are those described in the `Spinors` module and the index $$\pi$$ runs
 as specified in `lp.plidx`.
diff --git a/src/Solvers/Propagators.jl b/src/Solvers/Propagators.jl
index 541f0ed..73ebcce 100644
--- a/src/Solvers/Propagators.jl
+++ b/src/Solvers/Propagators.jl
@@ -99,7 +99,7 @@ end
 
 """
 
-    function Tbndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)
+    function Tbndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)
         
 Returns the propagator from the t=T boundary to the bulk for the SF boundary conditions for a source with color 'c' and spin 's'. The factor c_t is included while the factor 1/sqrt(V) is not.
 For the propagator from t=0 to the bulk, use the function bndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)

From 9e30ec05006cfc59900833e100e60e60105486db Mon Sep 17 00:00:00 2001
From: "Fernando P. Panadero" <fernando.p@csic.es>
Date: Sat, 9 Dec 2023 12:49:32 +0100
Subject: [PATCH 12/18] Typo in documentation

---
 src/Groups/AlgebraU2.jl | 2 +-
 src/Groups/AlgebraU3.jl | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Groups/AlgebraU2.jl b/src/Groups/AlgebraU2.jl
index c64c8a3..48a6ec9 100644
--- a/src/Groups/AlgebraU2.jl
+++ b/src/Groups/AlgebraU2.jl
@@ -16,7 +16,7 @@ end
 """
     antsym(a::SU2{T}) where T <: AbstractFloat
 
-Returns the antisymmetrization of the SU2 element `a`, that is `\`\ a - a^{\\dagger} `\`. This method returns al element of `U2alg{T}`.
+Returns the antisymmetrization of the SU2 element `a`, that is `\`\` `a - a^{\\dagger}` `\`. This method returns al element of `U2alg{T}`.
 """
 function antsym(a::SU2{T}) where T <: AbstractFloat
     return U2alg{T}(2.0*imag(a.t1),-2.0*imag(a.t1),2.0*a.t2)
diff --git a/src/Groups/AlgebraU3.jl b/src/Groups/AlgebraU3.jl
index 4bc42ba..ba0bbd4 100644
--- a/src/Groups/AlgebraU3.jl
+++ b/src/Groups/AlgebraU3.jl
@@ -17,7 +17,7 @@ end
 """
     antsym(a::SU3{T}) where T <: AbstractFloat
 
-Returns the antisymmetrization of the SU3 element `a`, that is `\`\ a - a^{\\dagger} `\`. This method returns al element of `U3alg{T}`.
+Returns the antisymmetrization of the SU3 element `a`, that is `\`\` `a - a^{\\dagger}` `\`. This method returns al element of `U3alg{T}`.
 """
 function antsym(a::SU3{T}) where T <: AbstractFloat
     t1 = 2.0*imag(a.u11)

From 12e648a0c5c446be4ee1cb371045652157b54070 Mon Sep 17 00:00:00 2001
From: "Fernando P. Panadero" <fernando.p@csic.es>
Date: Sat, 9 Dec 2023 12:52:44 +0100
Subject: [PATCH 13/18] Typo in doc

---
 src/Dirac/Dirac.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Dirac/Dirac.jl b/src/Dirac/Dirac.jl
index 4670fc2..6ca05e9 100644
--- a/src/Dirac/Dirac.jl
+++ b/src/Dirac/Dirac.jl
@@ -172,7 +172,7 @@ end
 """
     function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
 
-Computes the Dirac operator (with the Wilson term) `\`\ D_w \`\` with gauge field U and parameters `dpar` of the field `si` and stores it in `so`. 
+Computes the Dirac operator (with the Wilson term) `\`\``D_w``\`\` with gauge field U and parameters `dpar` of the field `si` and stores it in `so`. 
 If `dpar.csw` is different from zero, the clover term should be stored in `dws.csw` via the Csw! function and is automatically included in the operator.
 
 """

From f7f28b91c84ff5c49548aa472c4bebe186d50c9c Mon Sep 17 00:00:00 2001
From: "Fernando P. Panadero" <fernando.p@csic.es>
Date: Sat, 9 Dec 2023 12:55:37 +0100
Subject: [PATCH 14/18] Removed unused type in internal function

---
 src/Solvers/CG.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Solvers/CG.jl b/src/Solvers/CG.jl
index 5d0e3c3..684090e 100644
--- a/src/Solvers/CG.jl
+++ b/src/Solvers/CG.jl
@@ -18,7 +18,7 @@ function krnl_dot!(sum,fone,ftwo)
 return nothing
 end
 
-function field_dot(fone::AbstractArray,ftwo::AbstractArray,sumf,lp) where {T}
+function field_dot(fone::AbstractArray,ftwo::AbstractArray,sumf,lp)
         
     CUDA.@sync begin
             CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_dot!(sumf,fone,ftwo)

From 5bd1aadfd088d908eeb20fe7059a9b8961cb7a5e Mon Sep 17 00:00:00 2001
From: "Fernando P. Panadero" <fernando.p@csic.es>
Date: Mon, 11 Dec 2023 11:46:05 +0100
Subject: [PATCH 15/18] Documentation for Spinors

---
 docs/make.jl           |  6 +--
 docs/src/dirac.md      | 28 +++++++-------
 docs/src/spinors.md    | 86 ++++++++++++++++++++++++++++++++++++++++++
 src/Dirac/Dirac.jl     |  1 -
 src/LatticeGPU.jl      |  2 +-
 src/Solvers/CG.jl      |  4 +-
 src/Spinors/Spinors.jl | 44 ++++++++++-----------
 7 files changed, 129 insertions(+), 42 deletions(-)
 create mode 100644 docs/src/spinors.md

diff --git a/docs/make.jl b/docs/make.jl
index f17a9fd..b06156d 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -9,8 +9,8 @@ makedocs(sitename="LatticeGPU", modules=[LatticeGPU], doctest=true,
              "LatticeGPU.jl" => "index.md", 
              "Space-time" => "space.md",
              "Groups and algebras" => "groups.md",
-             "Fields" => "fields.md"
-             "Dirac" => "dirac.md"
+             "Fields" => "fields.md",
+             "Dirac" => "dirac.md",
              "Solvers" => "solvers.md"
-             ], 
+             ],
          repo = "https://igit.ific.uv.es/alramos/latticegpu.jl")
diff --git a/docs/src/dirac.md b/docs/src/dirac.md
index bcca0d3..30c59a9 100644
--- a/docs/src/dirac.md
+++ b/docs/src/dirac.md
@@ -28,24 +28,21 @@ Wilson-Dirac operator.
 The action of the Dirac operator `Dw!` is the following:
 
 ```math
-D\psi (\vec{x} = x_1,x_2,x_3,x_4) = (4 + m_0)psi(\vec{x})
+D_w\psi (\vec{x} = x_1,x_2,x_3,x_4) = (4 + m_0)\psi(\vec{x}) - 
 ```
 ```math
-    - \frac{1}{2}\sum_{\mu = 1}^4 \theta (\mu) (1-\gamma_\mu) U_\mu(\vec{x}) \psi(\vec{x} + \hat{\mu}) 
-```
-```math
-    + \theta^* (\mu) (1 + \gamma_\mu) U^{-1}_\mu(\vec{x} - \hat{\mu}) \psi(\vec{x} - \hat{\mu})
+    - \frac{1}{2}\sum_{\mu = 1}^4 \theta (\mu) (1-\gamma_\mu) U_\mu(\vec{x}) \psi(\vec{x} + \hat{\mu}) + \theta^* (\mu) (1 + \gamma_\mu) U^{-1}_\mu(\vec{x} - \hat{\mu}) \psi(\vec{x} - \hat{\mu})
 ```
 
 where $$m_0$$ and $$\theta$$ are respectively the values `.m0` and `.th` of [`DiracParam`](@ref).
-Note that $$|\theta(i)|=1$$ is not built into the code, so it should be imposed explicitly. 
+Note that $$|\theta(\mu)|=1$$ is not built into the code, so it should be imposed explicitly. 
 
 Additionally, if |`dpar.csw`| > 1.0E-10, the clover term is assumed to be stored in `ymws.csw`, which
-can be done via the [`Csw`](@ref) function. In this case we have the Sheikholeslami–Wohlert (SW) term
+can be done via the [`Csw!`](@ref) function. In this case we have the Sheikholeslami–Wohlert (SW) term
 in `Dw!`:
 
 ```math
-    \frac{i}{2}c_{sw} \sum_{\pi = 1}^6 F^{cl}_\pi \sigma_\pi \psi(\vec{x})
+    \delta D_w^{sw} = \frac{i}{2}c_{sw} \sum_{\pi = 1}^6 F^{cl}_\pi \sigma_\pi \psi(\vec{x})
 ```
 where the $$\sigma$$ matrices are those described in the `Spinors` module and the index $$\pi$$ runs
 as specified in `lp.plidx`.
@@ -54,7 +51,7 @@ If the boudary conditions, defined in `lp`, are either `BC_SF_ORBI,D` or `BC_SF_
 improvement term 
 
 ```math
-    (c_t -1) (\delta_{x_4,a} \psi(\vec{x}) + \delta_{x_4,T-a} \psi(\vec{x}))
+    \delta D_w^{SF} = (c_t -1) (\delta_{x_4,a} \psi(\vec{x}) + \delta_{x_4,T-a} \psi(\vec{x}))
 ```
 is added. Since the time-slice $$t=T$$ is not stored, this accounts to modifying the second
 and last time-slice. 
@@ -66,7 +63,7 @@ in the first time-slice is zero. To enforce this, we have the function
 SF_bndfix!
 ```
 
-The function [`Csw`](@ref) is used to store the clover in `dws.csw`. It is computed 
+The function [`Csw!`](@ref) is used to store the clover in `dws.csw`. It is computed 
 according to the expression
 
 ```math
@@ -76,9 +73,13 @@ F_{\mu,\nu} = \frac{1}{8} (Q_{\mu \nu} - Q_{\nu \mu})
 where 
 ```math
 Q_{\mu\nu} = U_\mu(\vec{x})U_{\nu}(x+\mu)U_{\mu}^{-1}(\vec{x}+\nu)U_{\nu}(\vec{x}) +
-+ U_{\nu}^{-1}(\vec{x}-\nu) U_\mu (\vec{x}-\nu) U_{\nu}(\vec{x} +\mu - \nu) U^{-1}_{\mu}(\vec{x}) +
+U_{\nu}^{-1}(\vec{x}-\nu) U_\mu (\vec{x}-\nu) U_{\nu}(\vec{x} +\mu - \nu) U^{-1}_{\mu}(\vec{x}) +
+```
+```math
 + U^{-1}_{\mu}(x-\mu)U_\nu^{-1}(\vec{x} - \mu - \nu)U_\mu(\vec{x} - \mu - \nu)U_\nu^{-1}(x-\nu) +
-+ U_{\nu}(\vec{x})U_{\mu}^{-1}(\vec{x} + \nu - \mu)U^{-1}_{\nu}(\vec{x} - \mu)U_\mu(\vec{x}-\mu)
+```
+```math
++U_{\nu}(\vec{x})U_{\mu}^{-1}(\vec{x} + \nu - \mu)U^{-1}_{\nu}(\vec{x} - \mu)U_\mu(\vec{x}-\mu)
 
 ```
 
@@ -91,7 +92,8 @@ F[b,4,r] \to F_{31}(b,r) ,\quad F[b,5,r] \to F_{32}(b,r) ,\quad F[b,6,r] \to F_{
 ```
 where $$(b,r)$$ labels the lattice points as explained in the module `Space`
 
-The function [`pfrandomize!`](@ref), userfull for stochastic sources, is also present. 
+The function [`pfrandomize!`](@ref), userfull for stochastic sources, is also present. It
+randomizes a  fermion field either in all the space or in a specifit time-slice.
 
 The generic interface of these functions reads
 
diff --git a/docs/src/spinors.md b/docs/src/spinors.md
new file mode 100644
index 0000000..16675bc
--- /dev/null
+++ b/docs/src/spinors.md
@@ -0,0 +1,86 @@
+# Spinors
+
+The module Spinors defines the necessary functions for the structure `Spinor{NS,G}`,
+which is a NS-tuple with values in G.
+
+The functions `norm`, `norm2`, `dot`, `*`, `/`, `/`, `+`, `-`, `imm` and `mimm`, 
+if defined for G, are extended to Spinor{NS,G} for general NS.
+
+For the 4d case where NS = 4 there are some specific functions to implement different
+operations with the gamma matrices. The convention for these matrices is
+
+
+```math
+\gamma _4 = \left( 
+    \begin{array}{cccc}
+        0 & 0 & -1 & 0\\
+        0 & 0 & 0 & -1\\
+        -1 & 0 & 0 & 0\\
+        0 & -1 & 0 & 0\\
+    \end{array}
+    \right)
+    \quad 
+    \gamma_1 = \left( 
+    \begin{array}{cccc}
+        0 & 0 & 0 & -i\\
+        0 & 0 & -i & 0\\
+        0 & i & 0 & 0\\
+        i & 0 & 0 & 0\\
+    \end{array}
+    \right)
+```
+```math 
+    \gamma _2 = \left( 
+    \begin{array}{cccc}
+        0 & 0 & 0 & -1\\
+        0 & 0 & 1 & 0\\
+        0 & 1 & 0 & 0\\
+        -1 & 0 & 0 & 0\\
+    \end{array}
+    \right)
+    \quad 
+    \gamma_3 = \left( 
+    \begin{array}{cccc}
+        0 & 0 & -i & 0\\
+        0 & 0 & 0 & i\\
+        i & 0 & 0 & 0\\
+        0 & -i & 0 & 0\\
+    \end{array}
+    \right)
+```
+
+
+The function [`dmul`](@ref) implements the multiplication over the $$\gamma$$ matrices
+
+```@docs
+dmul
+```
+
+The function [`pmul`](@ref) implements the $$ (1 \pm \gamma_N) $$ proyectors. The functions 
+[`gpmul`](@ref) and [`gdagpmul`](@ref) do the same and then multiply each element by `g`and 
+g^-1 repectively.
+
+```@docs
+pmul
+gpmul
+gdagpmul
+```
+
+## Some examples
+
+Here we just display some examples for these functions. We display it with `ComplexF64`
+instead of `SU3fund` or `SU2fund` for simplicity.
+
+
+```@setup exs
+import Pkg # hide
+Pkg.activate("/home/alberto/code/julia/LatticeGPU/") # hide
+using LatticeGPU # hide
+```
+```@repl exs
+spin = Spinor{4,Complex{Float64}}((1.0,im*0.5,2.3,0.0))
+println(spin)
+println(dmul(Gamma{4},spin))
+println(pmul(Pgamma{2,-1},spin))
+
+```
diff --git a/src/Dirac/Dirac.jl b/src/Dirac/Dirac.jl
index 6ca05e9..5655287 100644
--- a/src/Dirac/Dirac.jl
+++ b/src/Dirac/Dirac.jl
@@ -522,7 +522,6 @@ end
 Applies the operator \`\` \\gamma_5 D_w \`\` twice to `si` and stores the result in `so`. This is equivalent to appling the operator \`\` \`\`
 The Dirac operator is the same as in the functions `Dw!` and `g5Dw!`
 """
-
 function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
 
     if abs(dpar.csw) > 1.0E-10
diff --git a/src/LatticeGPU.jl b/src/LatticeGPU.jl
index 6c615b1..c091ac3 100644
--- a/src/LatticeGPU.jl
+++ b/src/LatticeGPU.jl
@@ -50,7 +50,7 @@ export import_lex64, import_cern64, import_bsfqcd, save_cnfg, read_cnfg, read_gp
 include("Spinors/Spinors.jl")
 
 using .Spinors    
-export Spinor, Pgamma
+export Spinor, Pgamma, Gamma
 export imm, mimm
 export pmul, gpmul, gdagpmul, dmul
 
diff --git a/src/Solvers/CG.jl b/src/Solvers/CG.jl
index 684090e..ffec5d4 100644
--- a/src/Solvers/CG.jl
+++ b/src/Solvers/CG.jl
@@ -29,7 +29,7 @@ end
 
 
 """
-    function CG!
+    function CG!(si, U, A, dpar::DiracParam, lp::SpaceParm, dws::DiracWorkspace{T}, maxiter::Int64 = 10, tol=1.0)
 
 Solves the linear equation `Ax = si`
 """
@@ -75,4 +75,4 @@ function CG!(si, U, A, dpar::DiracParam, lp::SpaceParm, dws::DiracWorkspace{T},
     end
     
     return niter
-end
\ No newline at end of file
+end
diff --git a/src/Spinors/Spinors.jl b/src/Spinors/Spinors.jl
index 6b8a46b..1cd29ea 100644
--- a/src/Spinors/Spinors.jl
+++ b/src/Spinors/Spinors.jl
@@ -169,7 +169,7 @@ end
 
 
 """
-    gpmul(pgamma{N,S}, g::G, a::Spinor) G <: Group
+    gpmul(Pgamma{N,S}, g::G, a::Spinor) G <: Group
 
 Returns ``g(1+s\\gamma_N)a``
 """
@@ -226,7 +226,7 @@ end
 end
 
 """
-    gdagpmul(pgamma{N,S}, g::G, a::Spinor) G <: Group
+    gdagpmul(Pgamma{N,S}, g::G, a::Spinor) G <: Group
 
 Returns ``g^+ (1+s\\gamma_N)a``
 """
@@ -284,33 +284,33 @@ end
 
 
 # dummy structs for dispatch:
-# Basis of \\Gamma_n
+# Basis of \\gamma_n
 struct Gamma{N}
 end
 
 """
-    dmul(n::Int64, a::Spinor)
+    dmul(Gamma{n}, a::Spinor)
 
-Returns ``\\Gamma_n a``
+Returns ``\\gamma_n a``
 
-indexing for Dirac basis ``\\Gamma_n``:
+indexing for Dirac basis ``\\gamma_n``:
 
- 1  gamma1
- 2  gamma2
- 3  gamma3
- 4  gamma0
- 5  gamma5
- 6  gamma1 gamma5
- 7  gamma2 gamma5
- 8  gamma3 gamma5
- 9  gamma0 gamma5
-10  sigma01
-11  sigma02
-12  sigma03
-13  sigma21
-14  sigma32
-15  sigma31
-16  identity
+ 1  gamma1;
+ 2  gamma2;
+ 3  gamma3;
+ 4  gamma0;
+ 5  gamma5;
+ 6  gamma1 gamma5;
+ 7  gamma2 gamma5;
+ 8  gamma3 gamma5;
+ 9  gamma0 gamma5;
+10  sigma01;
+11  sigma02;
+12  sigma03;
+13  sigma21;
+14  sigma32;
+15  sigma31;
+16  identity;
 
 """
 @inline dmul(::Type{Gamma{1}}, a::Spinor{NS,G}) where {NS,G} = Spinor{NS,G}((mimm(a.s[4]), mimm(a.s[3]), imm(a.s[2]), imm(a.s[1])))

From 6a90d74024ea16c09dd5486f90acb56c3558f565 Mon Sep 17 00:00:00 2001
From: "Fernando P. Panadero" <fernando.p@csic.es>
Date: Tue, 12 Dec 2023 10:42:27 +0100
Subject: [PATCH 16/18] Twisted mass

---
 src/Dirac/Dirac.jl             | 84 +++++++++++++++++-----------------
 test/dirac/test_fp_fa.jl       |  4 +-
 test/dirac/test_solver_plw.jl  |  2 +-
 test/dirac/test_solver_rand.jl |  2 +-
 4 files changed, 47 insertions(+), 45 deletions(-)

diff --git a/src/Dirac/Dirac.jl b/src/Dirac/Dirac.jl
index 5655287..b57fa6c 100644
--- a/src/Dirac/Dirac.jl
+++ b/src/Dirac/Dirac.jl
@@ -28,21 +28,22 @@ The parameters are:
 - `m0::T` : Mass of the fermion
 - `csw::T` : Improvement coefficient for the Csw term
 - `th{Ntuple{4,Complex{T}}}` : Phase for the fermions included in the boundary conditions, reabsorbed in the Dirac operator.
+- `tm` : Twisted mass paramete
 - `ct` : Boundary improvement term, only used for Schrödinger Funtional boundary conditions. 
 """
 struct DiracParam{T,R}
     m0::T
     csw::T
     th::NTuple{4,Complex{T}}
+    tm::T
     ct::T
 
-
-    function DiracParam{T}(::Type{R},m0,csw,th,ct) where {T,R} 
-        return new{T,R}(m0,csw,th,ct)
+    function DiracParam{T}(::Type{R},m0,csw,th,tm,ct) where {T,R}
+        return new{T,R}(m0,csw,th,tm,ct)
     end
 
-    function DiracParam{T}(m0,csw,th,ct) where {T} 
-        return new{T,SU3fund}(m0,csw,th,ct)
+    function DiracParam{T}(m0,csw,th,tm,ct) where {T}
+        return new{T,SU3fund}(m0,csw,th,tm,ct)
     end
 end
 function Base.show(io::IO, dpar::DiracParam{T,R}) where {T,R}
@@ -50,8 +51,9 @@ function Base.show(io::IO, dpar::DiracParam{T,R}) where {T,R}
     println(io, "Wilson fermions in the: ", R, " representation")
     println(io, " - Bare mass: ", dpar.m0," // Kappa = ",0.5/(dpar.m0+4))
     println(io, " - Csw :                ", dpar.csw)
-    println(io, " - c_t:                 ", dpar.ct)
     println(io, " - Theta:               ", dpar.th)
+    println(io, " - Twisted mass:        ", dpar.tm)
+    println(io, " - c_t:                 ", dpar.ct)
     return nothing
 end
 
@@ -181,13 +183,13 @@ function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6
     if abs(dpar.csw) > 1.0E-10
         @timeit "Dw" begin
             CUDA.@sync begin
-               CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, lp)
+               CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
             end
         end
     else
         @timeit "Dw" begin
             CUDA.@sync begin
-               CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.th, lp)
+               CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, lp)
             end
         end
     end
@@ -195,7 +197,7 @@ function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6
     return nothing
 end
 
-function krnl_Dwimpr!(so, U, si, Fcsw, m0, th, csw, lp::SpaceParm{4,6,B,D}) where {B,D}
+function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,B,D}) where {B,D}
 
     b = Int64(CUDA.threadIdx().x);  r = Int64(CUDA.blockIdx().x)
 
@@ -210,8 +212,8 @@ function krnl_Dwimpr!(so, U, si, Fcsw, m0, th, csw, lp::SpaceParm{4,6,B,D}) wher
 
     @inbounds begin 
         
-        so[b,r] = (4+m0)*si[b,r]  + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
-                                                +Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))          
+        so[b,r] = (4+m0)*si[b,r]+ im*tm*dmul(Gamma{5},si[b,r]) + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
+                                                                             +Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
 
         so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
                         th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
@@ -223,7 +225,7 @@ function krnl_Dwimpr!(so, U, si, Fcsw, m0, th, csw, lp::SpaceParm{4,6,B,D}) wher
     return nothing
 end
 
-function krnl_Dw!(so, U, si, m0, th, lp::SpaceParm{4,6,B,D}) where {B,D}
+function krnl_Dw!(so, U, si, m0, tm, th, lp::SpaceParm{4,6,B,D}) where {B,D}
 
     b = Int64(CUDA.threadIdx().x);  r = Int64(CUDA.blockIdx().x)
 
@@ -238,7 +240,7 @@ function krnl_Dw!(so, U, si, m0, th, lp::SpaceParm{4,6,B,D}) where {B,D}
 
     @inbounds begin 
         
-        so[b,r] = (4+m0)*si[b,r]
+        so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r])
 
         so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
                         th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
@@ -255,13 +257,13 @@ function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpacePa
     if abs(dpar.csw) > 1.0E-10
         @timeit "Dw" begin
             CUDA.@sync begin
-               CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp)
+               CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
             end
         end
     else
         @timeit "Dw" begin
             CUDA.@sync begin
-               CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.th, dpar.ct, lp)
+               CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
             end
         end
     end
@@ -269,7 +271,7 @@ function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpacePa
     return nothing
 end
 
-function krnl_Dwimpr!(so, U, si, Fcsw, m0, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
+function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
 
     # The field si is assumed to be zero at t = 0
 
@@ -288,8 +290,8 @@ function krnl_Dwimpr!(so, U, si, Fcsw, m0, th, csw, ct, lp::Union{SpaceParm{4,6,
 
         @inbounds begin 
             
-            so[b,r] = (4+m0)*si[b,r]  + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
-                                                    +Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))          
+            so[b,r] = (4+m0)*si[b,r]  + im*tm*dmul(Gamma{5},si[b,r]) + 0.5*csw*im*( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
+                                                                                   +Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) + Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) + Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))
 
             
             so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
@@ -306,7 +308,7 @@ function krnl_Dwimpr!(so, U, si, Fcsw, m0, th, csw, ct, lp::Union{SpaceParm{4,6,
     return nothing
 end
 
-function krnl_Dw!(so, U, si, m0, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
+function krnl_Dw!(so, U, si, m0, tm, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
 
     # The field si is assumed to be zero at t = 0
 
@@ -325,7 +327,7 @@ function krnl_Dw!(so, U, si, m0, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},S
 
         @inbounds begin 
             
-            so[b,r] = (4+m0)*si[b,r]
+            so[b,r] = (4+m0)*si[b,r] + im*tm*dmul(Gamma{5},si[b,r])
             so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
                             th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
                             th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
@@ -351,13 +353,13 @@ function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4
     if abs(dpar.csw) > 1.0E-10
         @timeit "g5Dw" begin
             CUDA.@sync begin
-               CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, lp)
+               CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
             end
         end
     else
         @timeit "g5Dw" begin
             CUDA.@sync begin
-               CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.th, lp)
+               CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, lp)
             end
         end
     end
@@ -365,7 +367,7 @@ function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4
     return nothing
 end
 
-function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, th, csw, lp::SpaceParm{4,6,B,D}) where {B,D}
+function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,B,D}) where {B,D}
 
     b = Int64(CUDA.threadIdx().x);  r = Int64(CUDA.blockIdx().x)
 
@@ -388,13 +390,13 @@ function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, th, csw, lp::SpaceParm{4,6,B,D}) wh
                         th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
                         th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
 
-        so[b,r] = dmul(Gamma{5}, so[b,r])
+        so[b,r] = dmul(Gamma{5}, so[b,r])+ im*tm*si[b,r]
     end
 
     return nothing
 end
 
-function krnl_g5Dw!(so, U, si, m0, th, lp::SpaceParm{4,6,B,D}) where {B,D}
+function krnl_g5Dw!(so, U, si, m0, tm, th, lp::SpaceParm{4,6,B,D}) where {B,D}
 
     b = Int64(CUDA.threadIdx().x);  r = Int64(CUDA.blockIdx().x)
 
@@ -416,7 +418,7 @@ function krnl_g5Dw!(so, U, si, m0, th, lp::SpaceParm{4,6,B,D}) where {B,D}
                         th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
                         th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
 
-        so[b,r] = dmul(Gamma{5}, so[b,r])
+        so[b,r] = dmul(Gamma{5}, so[b,r]) + im*tm*si[b,r]
     end
 
     return nothing
@@ -427,13 +429,13 @@ function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{Space
     if abs(dpar.csw) > 1.0E-10
         @timeit "g5Dw" begin
             CUDA.@sync begin
-               CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp)
+               CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
             end
         end
     else
         @timeit "g5Dw" begin
             CUDA.@sync begin
-               CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.th, dpar.ct, lp)
+               CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
             end
         end
     end
@@ -441,7 +443,7 @@ function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{Space
     return nothing
 end
 
-function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
+function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
 
     # The field si is assumed to be zero at t = 0
 
@@ -475,12 +477,12 @@ function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, th, csw, ct, lp::Union{SpaceParm{4,
         end
     end
 
-    so[b,r] = dmul(Gamma{5}, so[b,r])
+    so[b,r] = dmul(Gamma{5}, so[b,r])+ im*tm*si[b,r]
 
     return nothing
 end
 
-function krnl_g5Dw!(so, U, si, m0, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
+function krnl_g5Dw!(so, U, si, m0, tm, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
 
     # The field si is assumed to be zero at t = 0
 
@@ -511,7 +513,7 @@ function krnl_g5Dw!(so, U, si, m0, th, ct, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D}
         end
     end
 
-    so[b,r] = dmul(Gamma{5}, so[b,r])
+    so[b,r] = dmul(Gamma{5}, so[b,r]) + im*tm*si[b,r]
 
     return nothing
 end
@@ -519,7 +521,7 @@ end
 """
     function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
 
-Applies the operator \`\` \\gamma_5 D_w \`\` twice to `si` and stores the result in `so`. This is equivalent to appling the operator \`\` \`\`
+Applies the operator \`\` \\gamma_5 D_w \`\` twice to `si` and stores the result in `so`. This is equivalent to appling the operator \`\` D_w^\\dagger D_w \`\`
 The Dirac operator is the same as in the functions `Dw!` and `g5Dw!`
 """
 function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
@@ -529,13 +531,13 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{Sp
             
             @timeit "g5Dw" begin
                 CUDA.@sync begin
-                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp)
+                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
                 end
             end
 
             @timeit "g5Dw" begin
                 CUDA.@sync begin
-                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, dpar.th, dpar.csw, dpar.ct, lp)
+                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
                 end
             end
         end
@@ -544,13 +546,13 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{Sp
             
             @timeit "g5Dw" begin
                 CUDA.@sync begin
-                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.th, dpar.ct, lp)
+                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
                 end
             end
 
             @timeit "g5Dw" begin
                 CUDA.@sync begin
-                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, dpar.th, dpar.ct, lp)
+                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
                 end
             end
         end
@@ -566,13 +568,13 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpacePar
             
             @timeit "g5Dw" begin
                 CUDA.@sync begin
-                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.th, dpar.csw, lp)
+                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
                 end
             end
 
             @timeit "g5Dw" begin
                 CUDA.@sync begin
-                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, dpar.th, dpar.csw, lp)
+                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
                 end
             end
         end
@@ -581,13 +583,13 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpacePar
             
             @timeit "g5Dw" begin
                 CUDA.@sync begin
-                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.th, lp)
+                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, lp)
                 end
             end
 
             @timeit "g5Dw" begin
                 CUDA.@sync begin
-                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, dpar.th, lp)
+                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, dpar.tm, dpar.th, lp)
                 end
             end
         end
diff --git a/test/dirac/test_fp_fa.jl b/test/dirac/test_fp_fa.jl
index 6610bb4..ea49897 100644
--- a/test/dirac/test_fp_fa.jl
+++ b/test/dirac/test_fp_fa.jl
@@ -13,7 +13,7 @@ function fP_test(;theta = (0.5,0.7,1.0,0.0), m = 1.3, size = (8,8,8,16),prec = 1
 lp = SpaceParm{4}(size,(4,4,4,4),1,(0,0,0,0,0,0));
 exptheta = exp.(im.*theta./lp.iL);
 
-dpar = DiracParam{Float64}(SU3fund,m,0.0,exptheta,1.0);
+dpar = DiracParam{Float64}(SU3fund,m,0.0,exptheta,0.0,1.0);
 dws = DiracWorkspace(SU3fund,Float64,lp);
 
 U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64}));
@@ -65,7 +65,7 @@ function fA_test(;theta = (0.5,0.7,1.0,0.0), m = 1.3, size = (8,8,8,16),prec = 1
     lp = SpaceParm{4}(size,(4,4,4,4),1,(0,0,0,0,0,0));
     exptheta = exp.(im.*theta./lp.iL);
     
-    dpar = DiracParam{Float64}(SU3fund,m,0.0,exptheta,1.0);
+    dpar = DiracParam{Float64}(SU3fund,m,0.0,exptheta,0.0,1.0);
     dws = DiracWorkspace(SU3fund,Float64,lp);
     
     U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64}));
diff --git a/test/dirac/test_solver_plw.jl b/test/dirac/test_solver_plw.jl
index 195a1a3..9bd1ba8 100644
--- a/test/dirac/test_solver_plw.jl
+++ b/test/dirac/test_solver_plw.jl
@@ -7,7 +7,7 @@ using LatticeGPU, CUDA, TimerOutputs
 function Dwpw_test(;p=0,s=1,c=1)
 lp = SpaceParm{4}((16,16,16,16), (4,4,4,4), 0, (0,0,0,0,0,0))
 gp = GaugeParm{Float64}(SU3{Float64}, 6.0, 1.0)
-dpar = DiracParam{Float64}(SU3fund,1.3,0.0,(1.0,1.0,1.0,1.0),0.0)
+dpar = DiracParam{Float64}(SU3fund,1.3,0.0,(1.0,1.0,1.0,1.0),0.0,0.0)
 dws = DiracWorkspace(SU3fund,Float64,lp);
 
 p==0 ? p = Int.(round.(lp.iL.*rand(4),RoundUp)) : nothing
diff --git a/test/dirac/test_solver_rand.jl b/test/dirac/test_solver_rand.jl
index c06de0a..aaad348 100644
--- a/test/dirac/test_solver_rand.jl
+++ b/test/dirac/test_solver_rand.jl
@@ -9,7 +9,7 @@ using CUDA, LatticeGPU, TimerOutputs
     lp = SpaceParm{4}((16,16,16,16), (4,4,4,4), 0, (0,0,0,0,0,0))
     gp = GaugeParm{Float64}(SU3{Float64}, 6.0, 1.0)
     ymws = YMworkspace(SU3, Float64, lp)
-    dpar = DiracParam{Float64}(SU3fund,2.3,0.0,(1.0,1.0,1.0,1.0),0.0)
+    dpar = DiracParam{Float64}(SU3fund,2.3,0.0,(1.0,1.0,1.0,1.0),0.0,0.0)
     dws = DiracWorkspace(SU3fund,Float64,lp);
 
     randomize!(ymws.mom, lp, ymws)

From a7bc21769b9163bad1083718e173e5ce71da4072 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fernando=20P=C3=A9rez=20Panadero?= <fernando.p@csic.es>
Date: Wed, 13 Dec 2023 11:42:26 +0100
Subject: [PATCH 17/18] SF fix for the propagators

---
 docs/src/dirac.md             |   5 ++
 src/Dirac/Dirac.jl            |  15 ++--
 src/Solvers/Propagators.jl    |  10 ++-
 test/dirac/test_fp_fa.jl      | 152 +++++++++++++++++-----------------
 test/dirac/test_solver_plw.jl |  10 +--
 5 files changed, 101 insertions(+), 91 deletions(-)

diff --git a/docs/src/dirac.md b/docs/src/dirac.md
index 30c59a9..c664063 100644
--- a/docs/src/dirac.md
+++ b/docs/src/dirac.md
@@ -63,6 +63,11 @@ in the first time-slice is zero. To enforce this, we have the function
 SF_bndfix!
 ```
 
+Note that this is not enforced in the Dirac operators, so if the field `so` does not satisfy SF
+boundary conditions, it will not (in general) satisfy them after applying [`Dw!`](@ref) 
+or [`g5Dw!`](@ref). This function is called for the function [`DwdagDw!`](@ref), so in this case
+`so` will always be a proper SF field after calling this function.
+
 The function [`Csw!`](@ref) is used to store the clover in `dws.csw`. It is computed 
 according to the expression
 
diff --git a/src/Dirac/Dirac.jl b/src/Dirac/Dirac.jl
index b57fa6c..a6ff103 100644
--- a/src/Dirac/Dirac.jl
+++ b/src/Dirac/Dirac.jl
@@ -28,7 +28,7 @@ The parameters are:
 - `m0::T` : Mass of the fermion
 - `csw::T` : Improvement coefficient for the Csw term
 - `th{Ntuple{4,Complex{T}}}` : Phase for the fermions included in the boundary conditions, reabsorbed in the Dirac operator.
-- `tm` : Twisted mass paramete
+- `tm` : Twisted mass parameter
 - `ct` : Boundary improvement term, only used for Schrödinger Funtional boundary conditions. 
 """
 struct DiracParam{T,R}
@@ -534,12 +534,13 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{Sp
                     CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
                 end
             end
-
+            SF_bndfix!(dws.st,lp)
             @timeit "g5Dw" begin
                 CUDA.@sync begin
                     CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
                 end
             end
+            SF_bndfix!(so,lp)
         end
     else
         @timeit "DwdagDw" begin
@@ -549,12 +550,13 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{Sp
                     CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
                 end
             end
-
+            SF_bndfix!(dws.st,lp)
             @timeit "g5Dw" begin
                 CUDA.@sync begin
                     CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
                 end
             end
+            SF_bndfix!(so,lp)
         end
     end
 
@@ -604,10 +606,11 @@ end
 Sets all the values of `sp` in the  first time slice to zero.
 """
 function SF_bndfix!(sp, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
-    CUDA.@sync begin
-        CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(sp, lp)
+    @timeit "SF boundary fix" begin
+        CUDA.@sync begin
+            CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_sfbndfix!(sp, lp)
+        end
     end
-    
     return nothing
 end
 
diff --git a/src/Solvers/Propagators.jl b/src/Solvers/Propagators.jl
index 73ebcce..99cf462 100644
--- a/src/Solvers/Propagators.jl
+++ b/src/Solvers/Propagators.jl
@@ -56,7 +56,7 @@ end
 
     function bndpropagator!(pro,U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)
         
-Saves the propagator in from the t=0 boundary to the bulk for the SF boundary conditions for a source with color 'c' and spin 's'. The factor c_t is included while the factor 1/sqrt(V) is not.
+Saves the propagator from the t=0 boundary to the bulk for the SF boundary conditions for a source with color 'c' and spin 's' in 'pro'. The factor c_t is included while the factor 1/sqrt(V) is not.
 For the propagator from T to the bulk, use the function Tbndpropagator(U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::SpaceParm{4,6,1,D}, maxiter::Int64, tol::Float64, c::Int64, s::Int64)
 
 """
@@ -81,6 +81,7 @@ function bndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::Sp
         return nothing
     end
 
+    SF_bndfix!(pro,lp)
     fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
     
     CUDA.@sync begin
@@ -94,7 +95,7 @@ function bndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::Sp
     g5Dw!(pro,U,dpar.ct*dws.sp,dpar,dws,lp)
     
     CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
-    return pro
+    return nothing
 end
 
 """
@@ -124,7 +125,8 @@ function Tbndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::S
 
         return nothing
     end
-  
+
+    SF_bndfix!(pro,lp)
     fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
     
     CUDA.@sync begin
@@ -138,7 +140,7 @@ function Tbndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::S
     g5Dw!(pro,U,dpar.ct*dws.sp,dpar,dws,lp)
     
     CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
-    return pro
+    return nothing
 end
 
 
diff --git a/test/dirac/test_fp_fa.jl b/test/dirac/test_fp_fa.jl
index ea49897..adaf3eb 100644
--- a/test/dirac/test_fp_fa.jl
+++ b/test/dirac/test_fp_fa.jl
@@ -8,106 +8,106 @@ using TimerOutputs
 
 function fP_test(;theta = (0.5,0.7,1.0,0.0), m = 1.3, size = (8,8,8,16),prec = 1.0e-16)
 
-@timeit "fP inversion (x12)" begin
+    @timeit "fP inversion (x12)" begin
 
-lp = SpaceParm{4}(size,(4,4,4,4),1,(0,0,0,0,0,0));
-exptheta = exp.(im.*theta./lp.iL);
+    lp = SpaceParm{4}(size,(4,4,4,4),1,(0,0,0,0,0,0));
+    exptheta = exp.(im.*theta./lp.iL);
 
-dpar = DiracParam{Float64}(SU3fund,m,0.0,exptheta,0.0,1.0);
-dws = DiracWorkspace(SU3fund,Float64,lp);
+    dpar = DiracParam{Float64}(SU3fund,m,0.0,exptheta,0.0,1.0);
+    dws = DiracWorkspace(SU3fund,Float64,lp);
 
-U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64}));
-psi = scalar_field(Spinor{4,SU3fund{Float64}},lp);
+    U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64}));
+    psi = scalar_field(Spinor{4,SU3fund{Float64}},lp);
 
-res = zeros(lp.iL[4])
+    res = zeros(lp.iL[4])
 
-for s in 1:4 for c in 1:3
-    bndpropagator!(psi,U,dpar,dws,lp,1000,prec,c,s);
+    for s in 1:4 for c in 1:3
+        bndpropagator!(psi,U,dpar,dws,lp,1000,prec,c,s);
 
-    for t in 1:lp.iL[4]
-    #for i in 1:lp.iL[1]    for j in 1:lp.iL[2]        for k in 1:lp.iL[3]
-                i=abs(rand(Int))%lp.iL[1] +1;j=abs(rand(Int))%lp.iL[2] +1;k=abs(rand(Int))%lp.iL[3] +1;
-                CUDA.@allowscalar (res[t] += norm2(psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...])/2)
-    #end end end
-    #res[t] = res[t]/(lp.iL[1]*lp.iL[2]*lp.iL[3])
+        for t in 1:lp.iL[4]
+        #for i in 1:lp.iL[1]    for j in 1:lp.iL[2]        for k in 1:lp.iL[3]
+                    i=abs(rand(Int))%lp.iL[1] +1;j=abs(rand(Int))%lp.iL[2] +1;k=abs(rand(Int))%lp.iL[3] +1;
+                    CUDA.@allowscalar (res[t] += norm2(psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...])/2)
+        #end end end
+        #res[t] = res[t]/(lp.iL[1]*lp.iL[2]*lp.iL[3])
+
+        end
+
+    end end
 
     end
 
-end end
+    @timeit "fP analitical solution" begin
 
-end
+        #THEORETICAL SOLUTION: hep-lat/9606016 eq (2.33)
 
-@timeit "fP analitical solution" begin
+        res_th = zeros(lp.iL[4])
 
-    #THEORETICAL SOLUTION: hep-lat/9606016 eq (2.33)
+        pp3 = ntuple(i -> theta[i]/lp.iL[i],3)
+        omega = 2 * asinh(0.5* sqrt(( sum((sin.(pp3)).^2) + (m + 2*(sum((sin.(pp3./2)).^2) ))^2) / (1+m+2*(sum((sin.(pp3./2)).^2) )) ) )
+        pp = (-im*omega,pp3...)
+        Mpp = m + 2* sum((sin.(pp./2)).^2)
+        Rpp = Mpp*(1-exp(-2*omega*lp.iL[4])) + sinh(omega) * (1+exp(-2*omega*lp.iL[4]))
 
-    res_th = zeros(lp.iL[4])
+        for i in 2:lp.iL[4]
+            res_th[i] = (2*3*sinh(omega)/(Rpp^2)) * ( (Mpp + sinh(omega))*exp(-2*omega*(i-1)) - (Mpp - sinh(omega))*exp(-2*omega*(2*lp.iL[4]- (i - 1))) )
+        end
 
-    pp3 = ntuple(i -> theta[i]/lp.iL[i],3)
-    omega = 2 * asinh(0.5* sqrt(( sum((sin.(pp3)).^2) + (m + 2*(sum((sin.(pp3./2)).^2) ))^2) / (1+m+2*(sum((sin.(pp3./2)).^2) )) ) )
-    pp = (-im*omega,pp3...)
-    Mpp = m + 2* sum((sin.(pp./2)).^2)
-    Rpp = Mpp*(1-exp(-2*omega*lp.iL[4])) + sinh(omega) * (1+exp(-2*omega*lp.iL[4]))
-
-    for i in 2:lp.iL[4]
-        res_th[i] = (2*3*sinh(omega)/(Rpp^2)) * ( (Mpp + sinh(omega))*exp(-2*omega*(i-1)) - (Mpp - sinh(omega))*exp(-2*omega*(2*lp.iL[4]- (i - 1))) )
     end
-
-end
     return sum(abs.(res-res_th))
 
 end
 
 function fA_test(;theta = (0.5,0.7,1.0,0.0), m = 1.3, size = (8,8,8,16),prec = 1.0e-16)
 
-@timeit "fA inversion (x12)" begin
+    @timeit "fA inversion (x12)" begin
+
+        lp = SpaceParm{4}(size,(4,4,4,4),1,(0,0,0,0,0,0));
+        exptheta = exp.(im.*theta./lp.iL);
+
+        dpar = DiracParam{Float64}(SU3fund,m,0.0,exptheta,0.0,1.0);
+        dws = DiracWorkspace(SU3fund,Float64,lp);
+
+        U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64}));
+        psi = scalar_field(Spinor{4,SU3fund{Float64}},lp);
+
+        res = im*zeros(lp.iL[4])
+
+        for s in 1:4 for c in 1:3
+            bndpropagator!(psi,U,dpar,dws,lp,1000,prec,c,s);
+
+            for t in 1:lp.iL[4]
+            #for i in 1:lp.iL[1]    for j in 1:lp.iL[2]        for k in 1:lp.iL[3]
+                        i=abs(rand(Int))%lp.iL[1] +1;j=abs(rand(Int))%lp.iL[2] +1;k=abs(rand(Int))%lp.iL[3] +1;
+                        CUDA.@allowscalar (res[t] += -dot(psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...],dmul(Gamma{4},psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...]))/2)
+            #end end end
+            #res[t] = res[t]/(lp.iL[1]*lp.iL[2]*lp.iL[3])
+
+            end
+
+        end end
 
-    lp = SpaceParm{4}(size,(4,4,4,4),1,(0,0,0,0,0,0));
-    exptheta = exp.(im.*theta./lp.iL);
-    
-    dpar = DiracParam{Float64}(SU3fund,m,0.0,exptheta,0.0,1.0);
-    dws = DiracWorkspace(SU3fund,Float64,lp);
-    
-    U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64}));
-    psi = scalar_field(Spinor{4,SU3fund{Float64}},lp);
-    
-    res = im*zeros(lp.iL[4])
-    
-    for s in 1:4 for c in 1:3
-        bndpropagator!(psi,U,dpar,dws,lp,1000,prec,c,s);
-    
-        for t in 1:lp.iL[4]
-        #for i in 1:lp.iL[1]    for j in 1:lp.iL[2]        for k in 1:lp.iL[3]
-                    i=abs(rand(Int))%lp.iL[1] +1;j=abs(rand(Int))%lp.iL[2] +1;k=abs(rand(Int))%lp.iL[3] +1;
-                    CUDA.@allowscalar (res[t] += -dot(psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...],dmul(Gamma{4},psi[point_index(CartesianIndex{lp.ndim}((i,j,k,t)),lp)...]))/2)
-        #end end end
-        #res[t] = res[t]/(lp.iL[1]*lp.iL[2]*lp.iL[3])
-    
-        end
-    
-    end end
-    
-end
-    #THEORETICAL SOLUTION: hep-lat/9606016 eq (2.32)
-   
-@timeit "fA analitical solution" begin
-    res_th = zeros(lp.iL[4])
-    
-    pp3 = ntuple(i -> theta[i]/lp.iL[i],3)
-    omega = 2 * asinh(0.5* sqrt(( sum((sin.(pp3)).^2) + (m + 2*(sum((sin.(pp3./2)).^2) ))^2) / (1+m+2*(sum((sin.(pp3./2)).^2) )) ) )
-    pp = (-im*omega,pp3...)
-    Mpp = m + 2* sum((sin.(pp./2)).^2)
-    Rpp = Mpp*(1-exp(-2*omega*lp.iL[4])) + sinh(omega) * (1+exp(-2*omega*lp.iL[4]))
-    
-    for i in 2:lp.iL[4]
-        res_th[i] = (6/(Rpp^2)) * ( 2*(Mpp - sinh(omega))*(Mpp + sinh(omega))*exp(-2*omega*lp.iL[4]) 
-        - Mpp*((Mpp + sinh(omega))*exp(-2*omega*(i-1)) + (Mpp - sinh(omega))*exp(-2*omega*(2*lp.iL[4]- (i - 1)))))
     end
-    
-end
-    
+        #THEORETICAL SOLUTION: hep-lat/9606016 eq (2.32)
+
+    @timeit "fA analitical solution" begin
+        res_th = zeros(lp.iL[4])
+
+        pp3 = ntuple(i -> theta[i]/lp.iL[i],3)
+        omega = 2 * asinh(0.5* sqrt(( sum((sin.(pp3)).^2) + (m + 2*(sum((sin.(pp3./2)).^2) ))^2) / (1+m+2*(sum((sin.(pp3./2)).^2) )) ) )
+        pp = (-im*omega,pp3...)
+        Mpp = m + 2* sum((sin.(pp./2)).^2)
+        Rpp = Mpp*(1-exp(-2*omega*lp.iL[4])) + sinh(omega) * (1+exp(-2*omega*lp.iL[4]))
+
+        for i in 2:lp.iL[4]
+            res_th[i] = (6/(Rpp^2)) * ( 2*(Mpp - sinh(omega))*(Mpp + sinh(omega))*exp(-2*omega*lp.iL[4])
+            - Mpp*((Mpp + sinh(omega))*exp(-2*omega*(i-1)) + (Mpp - sinh(omega))*exp(-2*omega*(2*lp.iL[4]- (i - 1)))))
+        end
+
+    end
+
     return sum(abs.(res-res_th))
-    
+
 end
 
 
diff --git a/test/dirac/test_solver_plw.jl b/test/dirac/test_solver_plw.jl
index 9bd1ba8..a4de0c7 100644
--- a/test/dirac/test_solver_plw.jl
+++ b/test/dirac/test_solver_plw.jl
@@ -96,15 +96,15 @@ end
 
 
 begin
-dif = 0.0
+diff = 0.0
 for i in 1:3 for j in 1:4
-    dif += Dwpw_test(c=i,s=j)
+    global diff += Dwpw_test(c=i,s=j)
 end end
 
-if dif < 1.0e-15
-    print("Dwpl test passed with average error ", dif/12,"!\n")    
+if diff < 1.0e-15
+    print("Dwpl test passed with average error ", diff/12,"!\n")
 else
-    error("Dwpl test failed with difference: ",dif,"\n")
+    error("Dwpl test failed with difference: ",diff,"\n")
 end
 
 

From 5a163798726fcd8b9d8ce659aff530915ee172d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fernando=20P=C3=A9rez=20Panadero?= <fernando.p@csic.es>
Date: Thu, 14 Dec 2023 11:48:44 +0100
Subject: [PATCH 18/18] TM properly included in the propagators

---
 docs/src/dirac.md              |  3 ++-
 src/Dirac/Dirac.jl             | 22 ++++++++++++++++------
 src/LatticeGPU.jl              |  2 +-
 src/Solvers/Propagators.jl     |  8 ++++----
 test/dirac/test_solver_rand.jl |  3 ++-
 test/runtests.jl               |  5 ++++-
 6 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/docs/src/dirac.md b/docs/src/dirac.md
index c664063..75a330a 100644
--- a/docs/src/dirac.md
+++ b/docs/src/dirac.md
@@ -28,7 +28,7 @@ Wilson-Dirac operator.
 The action of the Dirac operator `Dw!` is the following:
 
 ```math
-D_w\psi (\vec{x} = x_1,x_2,x_3,x_4) = (4 + m_0)\psi(\vec{x}) - 
+D_w\psi (\vec{x} = x_1,x_2,x_3,x_4) = (4 + m_0 + i \mu \gamma_5)\psi(\vec{x}) - 
 ```
 ```math
     - \frac{1}{2}\sum_{\mu = 1}^4 \theta (\mu) (1-\gamma_\mu) U_\mu(\vec{x}) \psi(\vec{x} + \hat{\mu}) + \theta^* (\mu) (1 + \gamma_\mu) U^{-1}_\mu(\vec{x} - \hat{\mu}) \psi(\vec{x} - \hat{\mu})
@@ -108,4 +108,5 @@ g5Dw!
 DwdagDw!
 Csw!
 pfrandomize!
+mtwmdpar
 ```
diff --git a/src/Dirac/Dirac.jl b/src/Dirac/Dirac.jl
index a6ff103..74eb63c 100644
--- a/src/Dirac/Dirac.jl
+++ b/src/Dirac/Dirac.jl
@@ -22,7 +22,7 @@ using ..Spinors
 """
     struct DiracParam{T,R}
 
-Stores the parameters of the Dirac operator. It can be generated via the constructor `function DiracParam{T}(::Type{R},m0,csw,th,ct)`. The first argument can be ommited and is taken to be `SU3fund`.
+Stores the parameters of the Dirac operator. It can be generated via the constructor `function DiracParam{T}(::Type{R},m0,csw,th,tm,ct)`. The first argument can be ommited and is taken to be `SU3fund`.
 The parameters are:
 
 - `m0::T` : Mass of the fermion
@@ -537,7 +537,7 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{Sp
             SF_bndfix!(dws.st,lp)
             @timeit "g5Dw" begin
                 CUDA.@sync begin
-                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
+                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
                 end
             end
             SF_bndfix!(so,lp)
@@ -553,7 +553,7 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{Sp
             SF_bndfix!(dws.st,lp)
             @timeit "g5Dw" begin
                 CUDA.@sync begin
-                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
+                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, dpar.ct, lp)
                 end
             end
             SF_bndfix!(so,lp)
@@ -576,7 +576,7 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpacePar
 
             @timeit "g5Dw" begin
                 CUDA.@sync begin
-                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
+                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, lp)
                 end
             end
         end
@@ -591,7 +591,7 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpacePar
 
             @timeit "g5Dw" begin
                 CUDA.@sync begin
-                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, dpar.tm, dpar.th, lp)
+                    CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, lp)
                 end
             end
         end
@@ -600,6 +600,16 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpacePar
     return nothing
 end
 
+"""
+    function mtwmdpar(dpar::DiracParam)
+
+Returns `dpar` with oposite value of the twisted mass.
+"""
+function mtwmdpar(dpar::DiracParam{P,R}) where {P,R}
+    return DiracParam{P}(R,dpar.m0,dpar.csw,dpar.th,-dpar.tm,dpar.ct)
+end
+
+
 """
     SF_bndfix!(sp, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}})
 
@@ -694,7 +704,7 @@ function krnl_assign_pf_su2!(f::AbstractArray, p , lp::SpaceParm, t::Int64)
     return nothing
 end
 
-export Dw!, g5Dw!, DwdagDw!, SF_bndfix!, Csw!, pfrandomize!
+export Dw!, g5Dw!, DwdagDw!, SF_bndfix!, Csw!, pfrandomize!, mtwmdpar
 
 
 include("DiracIO.jl")
diff --git a/src/LatticeGPU.jl b/src/LatticeGPU.jl
index c091ac3..acbcc82 100644
--- a/src/LatticeGPU.jl
+++ b/src/LatticeGPU.jl
@@ -57,7 +57,7 @@ export pmul, gpmul, gdagpmul, dmul
 include("Dirac/Dirac.jl")
 using .Dirac
 export DiracWorkspace, DiracParam
-export Dw!, g5Dw!, DwdagDw!, SF_bndfix!, Csw!, pfrandomize!
+export Dw!, g5Dw!, DwdagDw!, SF_bndfix!, Csw!, pfrandomize!, mtwmdpar
 export read_prop, save_prop, read_dpar
 
 include("Solvers/Solvers.jl")
diff --git a/src/Solvers/Propagators.jl b/src/Solvers/Propagators.jl
index 99cf462..58008b2 100644
--- a/src/Solvers/Propagators.jl
+++ b/src/Solvers/Propagators.jl
@@ -25,7 +25,7 @@ function propagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::Space
         CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
     end
        
-    g5Dw!(pro,U,dws.sp,dpar,dws,lp)
+    g5Dw!(pro,U,dws.sp,mtwmdpar(dpar),dws,lp)
       
     CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
     return nothing
@@ -46,7 +46,7 @@ function propagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::Space
         CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
     end
        
-    g5Dw!(pro,U,dws.sp,dpar,dws,lp)
+    g5Dw!(pro,U,dws.sp,mtwmdpar(dpar),dws,lp)
       
     CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
     return nothing
@@ -92,7 +92,7 @@ function bndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::Sp
         CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
     end
        
-    g5Dw!(pro,U,dpar.ct*dws.sp,dpar,dws,lp)
+    g5Dw!(pro,U,dpar.ct*dws.sp,mtwmdpar(dpar),dws,lp)
     
     CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
     return nothing
@@ -137,7 +137,7 @@ function Tbndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::S
             CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
     end
        
-    g5Dw!(pro,U,dpar.ct*dws.sp,dpar,dws,lp)
+    g5Dw!(pro,U,dpar.ct*dws.sp,mtwmdpar(dpar),dws,lp)
     
     CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
     return nothing
diff --git a/test/dirac/test_solver_rand.jl b/test/dirac/test_solver_rand.jl
index aaad348..0714d8f 100644
--- a/test/dirac/test_solver_rand.jl
+++ b/test/dirac/test_solver_rand.jl
@@ -32,7 +32,8 @@ end
 CUDA.@sync begin
         CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(rpsi)
 end
-g5Dw!(prop,U,rpsi,dpar,dws,lp)
+
+g5Dw!(prop,U,rpsi,mtwmdpar(dpar),dws,lp)
 CG!(prop,U,DwdagDw!,dpar,lp,dws,10000,1.0e-14)
 
 Dw!(dws.sp,U,prop,dpar,dws,lp)
diff --git a/test/runtests.jl b/test/runtests.jl
index fe59102..2f68f70 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,3 +1,6 @@
 
-#include("SAD/test_sad.jl")
+include("SAD/test_sad.jl")
 include("flow/test_adapt.jl")
+include("dirac/test_fp_fa.jl")
+include("dirac/test_solver_plw.jl")
+include("dirac/test_solver_rand.jl")