diff --git a/src/Dirac/Diracfields.jl b/src/Dirac/Diracfields.jl index f7e3edd..488e05a 100644 --- a/src/Dirac/Diracfields.jl +++ b/src/Dirac/Diracfields.jl @@ -120,7 +120,7 @@ end Randomizes the SU2fund / SU3fund fermion field. If the argument t is present, it only randomizes that time-slice. """ -function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::SpaceParm, t::Int64 = 0) where {T} +function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::SpaceParm{4,6,BC_PERIODIC,D}, t::Int64 = 0) where {T,D} @timeit "Randomize pseudofermion field" begin p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4) # complex generation not suported for Julia 1.5.4 @@ -132,6 +132,19 @@ function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::SpaceParm, t: return nothing end +function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}, t::Int64 = 0) where {T,D} + + @timeit "Randomize pseudofermion field" begin + p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4) # complex generation not suported for Julia 1.5.4 + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su3!(f,p,lp,t) + end + end + SF_bndfix!(f,lp) + + return nothing +end + function krnl_assign_pf_su3!(f::AbstractArray, p , lp::SpaceParm, t::Int64) @inbounds begin diff --git a/src/Dirac/Diracflow.jl b/src/Dirac/Diracflow.jl index 2bbac09..8065840 100644 --- a/src/Dirac/Diracflow.jl +++ b/src/Dirac/Diracflow.jl @@ -30,7 +30,7 @@ function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::D ymws.mom .= int.e0[k].*ymws.mom .+ int.e1[k].*ymws.frc1 U .= expm.(U, ymws.mom, 2*eps) - end + end end end @@ -86,7 +86,7 @@ function backflow(psi, U, Dt, maxnsave::Int64, gp::GaugeParm, dpar::DiracParam, @timeit "CPU to GPU" copyto!(U,U0) for j in dsave:-1:1 - @timeit "CPU to GPU" copyto!(U,U0) + @timeit "CPU to GPU" copyto!(U,U0) for k in 1:j-1 flw(U, int, 1, eps_all[k], gp, lp, ymws) end @@ -209,20 +209,20 @@ Computes /`/` \\nabla^* \\nabla /`/` `si` and stores it in `si`. """ function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D} - @timeit "Laplacian" begin - CUDA.@sync begin - CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp) - end + @timeit "Laplacian" begin + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp) end + end return nothing end function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}) where {D} SF_bndfix!(si,lp) - @timeit "Laplacian" begin - CUDA.@sync begin - CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp) - end + @timeit "Laplacian" begin + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp) end + end SF_bndfix!(so,lp) return nothing end @@ -234,9 +234,9 @@ function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,BC_OPEN,D}) where {D} @inbounds begin - if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end]) + if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end])) - so[b,r] = -4*si[b,r] + so[b,r] = -4*si[b,r] bu1, ru1 = up((b,r), 1, lp) bd1, rd1 = dw((b,r), 1, lp) @@ -247,10 +247,10 @@ function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,BC_OPEN,D}) where {D} bu4, ru4 = up((b,r), 4, lp) bd4, rd4 = dw((b,r), 4, lp) - so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) + - th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) + - th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) + - th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) ) + so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) + + th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) + + th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) + + th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) ) end end @@ -265,19 +265,19 @@ function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,BC_PERIODIC,D}) where so[b,r] = -4*si[b,r] - bu1, ru1 = up((b,r), 1, lp) - bd1, rd1 = dw((b,r), 1, lp) - bu2, ru2 = up((b,r), 2, lp) - bd2, rd2 = dw((b,r), 2, lp) - bu3, ru3 = up((b,r), 3, lp) - bd3, rd3 = dw((b,r), 3, lp) - bu4, ru4 = up((b,r), 4, lp) - bd4, rd4 = dw((b,r), 4, lp) + bu1, ru1 = up((b,r), 1, lp) + bd1, rd1 = dw((b,r), 1, lp) + bu2, ru2 = up((b,r), 2, lp) + bd2, rd2 = dw((b,r), 2, lp) + bu3, ru3 = up((b,r), 3, lp) + bd3, rd3 = dw((b,r), 3, lp) + bu4, ru4 = up((b,r), 4, lp) + bd4, rd4 = dw((b,r), 4, lp) so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) + - th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) + - th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) + - th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) ) + th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) + + th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) + + th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) ) end return nothing @@ -291,9 +291,9 @@ function krnl_Nablanabla(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},Sp if (point_time((b,r),lp) != 1) - so[b,r] = -4*si[b,r] + so[b,r] = -4*si[b,r] - bu1, ru1 = up((b,r), 1, lp) + bu1, ru1 = up((b,r), 1, lp) bd1, rd1 = dw((b,r), 1, lp) bu2, ru2 = up((b,r), 2, lp) bd2, rd2 = dw((b,r), 2, lp) @@ -302,10 +302,10 @@ function krnl_Nablanabla(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},Sp bu4, ru4 = up((b,r), 4, lp) bd4, rd4 = dw((b,r), 4, lp) - so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) + - th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) + - th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) + - th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) ) + so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) + + th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) + + th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) + + th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) ) end end @@ -325,40 +325,40 @@ Computes /`/` //slashed{D}^2 si /`/` ans stores it in `si`. """ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D} - @timeit "DwdagDw" begin + @timeit "DwdagDw" begin - @timeit "g5Dslsh" begin + @timeit "g5Dslsh" begin CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(dws.st, U, si, dpar.th, lp) end - end + end - if abs(dpar.csw) > 1.0E-10 - @timeit "Dw_improvement" begin - CUDA.@sync begin - CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(dws.st, dws.csw, dpar.csw, si, lp) - end + if abs(dpar.csw) > 1.0E-10 + @timeit "Dw_improvement" begin + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(dws.st, dws.csw, dpar.csw, si, lp) end end + end - @timeit "g5Dslsh" begin + @timeit "g5Dslsh" begin CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(so, U, dws.st, dpar.th, lp) end - end + end - if abs(dpar.csw) > 1.0E-10 - @timeit "Dw_improvement" begin - CUDA.@sync begin - CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(so, dws.csw, dpar.csw, dws.st, lp) - end + if abs(dpar.csw) > 1.0E-10 + @timeit "Dw_improvement" begin + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(so, dws.csw, dpar.csw, dws.st, lp) end end - - end + + end + return nothing end @@ -382,12 +382,12 @@ function krnl_g5Dslsh!(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},Spac bu4, ru4 = up((b,r), 4, lp) bd4, rd4 = dw((b,r), 4, lp) - so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) + - th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) + - th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) + - th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) ) + so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) + + th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) + + th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) + + th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) ) - so[b,r] = dmul(Gamma{5}, so[b,r]) + so[b,r] = dmul(Gamma{5}, so[b,r]) end end return nothing @@ -402,19 +402,19 @@ function krnl_g5Dslsh!(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {D,B} so[b,r] = 4*si[b,r] - bu1, ru1 = up((b,r), 1, lp) - bd1, rd1 = dw((b,r), 1, lp) - bu2, ru2 = up((b,r), 2, lp) - bd2, rd2 = dw((b,r), 2, lp) - bu3, ru3 = up((b,r), 3, lp) - bd3, rd3 = dw((b,r), 3, lp) - bu4, ru4 = up((b,r), 4, lp) - bd4, rd4 = dw((b,r), 4, lp) + bu1, ru1 = up((b,r), 1, lp) + bd1, rd1 = dw((b,r), 1, lp) + bu2, ru2 = up((b,r), 2, lp) + bd2, rd2 = dw((b,r), 2, lp) + bu3, ru3 = up((b,r), 3, lp) + bd3, rd3 = dw((b,r), 3, lp) + bu4, ru4 = up((b,r), 4, lp) + bd4, rd4 = dw((b,r), 4, lp) - so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) + - th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) + - th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) + - th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) ) + so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) + + th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) + + th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) + + th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) ) so[b,r] = dmul(Gamma{5}, so[b,r]) end @@ -426,11 +426,11 @@ function krnl_g5Dslsh_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B, @inbounds begin - b = Int64(CUDA.threadIdx().x); - r = Int64(CUDA.blockIdx().x) + b = Int64(CUDA.threadIdx().x); + r = Int64(CUDA.blockIdx().x) so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r]) - -Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))) + -Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))) end return nothing @@ -442,15 +442,15 @@ function krnl_g5Dslsh_impr!(so, Fcsw, csw, si, lp::Union{SpaceParm{4,6,BC_SF_ORB @inbounds begin - b = Int64(CUDA.threadIdx().x); - r = Int64(CUDA.blockIdx().x) + b = Int64(CUDA.threadIdx().x); + r = Int64(CUDA.blockIdx().x) - if (point_time((b,r),lp) != 1) + if (point_time((b,r),lp) != 1) - so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r]) - -Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))) - end + so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r]) + -Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))) + end - return nothing + return nothing end end diff --git a/src/Dirac/Diracoper.jl b/src/Dirac/Diracoper.jl index e6394d1..ba363e1 100644 --- a/src/Dirac/Diracoper.jl +++ b/src/Dirac/Diracoper.jl @@ -17,13 +17,13 @@ function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6 if abs(dpar.csw) > 1.0E-10 @timeit "Dw" begin CUDA.@sync begin - CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp) + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp) end end else @timeit "Dw" begin CUDA.@sync begin - CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, lp) + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp) end end end @@ -32,7 +32,7 @@ function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6 return nothing end -function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,BC_OPEN,D}) where {D} +function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D} # The field si is assumed to be zero at t = 0,T b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x) @@ -67,7 +67,7 @@ function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,BC_OPE return nothing end -function krnl_Dw!(so, U, si, m0, tm, th, lp::SpaceParm{4,6,BC_OPEN,D}) where {D} +function krnl_Dw!(so, U, si, m0, tm, th, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D} # The field si is assumed to be zero at t = 0,T b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x) @@ -113,13 +113,13 @@ function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4 if abs(dpar.csw) > 1.0E-10 @timeit "g5Dw" begin CUDA.@sync begin - CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp) + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp) end end else @timeit "g5Dw" begin CUDA.@sync begin - CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, lp) + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp) end end end @@ -128,7 +128,7 @@ function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4 return nothing end -function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,BC_OPEN,D}) where {D} +function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D} # The field si is assumed to be zero at t = 0,T b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x) @@ -166,7 +166,7 @@ function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,BC_O return nothing end -function krnl_g5Dw!(so, U, si, m0, tm, th, lp::SpaceParm{4,6,BC_OPEN,D}) where {D} +function krnl_g5Dw!(so, U, si, m0, tm, th, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D} # The field si is assumed to be zero at t = 0,T b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x) @@ -215,13 +215,13 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpacePar @timeit "g5Dw" begin CUDA.@sync begin - CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp) + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp) end end SF_bndfix!(dws.st,lp) @timeit "g5Dw" begin CUDA.@sync begin - CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, lp) + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, dpar.ct, lp) end end SF_bndfix!(so,lp) @@ -231,13 +231,13 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpacePar @timeit "g5Dw" begin CUDA.@sync begin - CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, lp) + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp) end end SF_bndfix!(dws.st,lp) @timeit "g5Dw" begin CUDA.@sync begin - CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, lp) + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, dpar.ct, lp) end end SF_bndfix!(so,lp) diff --git a/src/YM/YMact.jl b/src/YM/YMact.jl index e0ec614..500ae0f 100644 --- a/src/YM/YMact.jl +++ b/src/YM/YMact.jl @@ -11,7 +11,7 @@ ## -## OPEN DEBERIA ESTAR BIEN LAS ACCIONES, CHECKEAR. LAS FUERZAS FALTAN +## OPEN ## function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, ztw, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,NB,N,M,D} @@ -113,7 +113,7 @@ function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::SpaceParm{N,M,B gt1 = U[bu1,id2,ru1] if ( (it == lp.iL[end]) || (it == 1)) && !TOBC - S += 0.5*cG*(c0*tr(g2*ga/U[bu2,id1,ru2])) + S += 0.5*cG*(tr(U[b,id1,r]*gt1 / (U[b,id2,r]*U[bu2,id1,ru2]))) elseif (it == lp.iL[end]) && TOBC nothing else diff --git a/src/YM/YMfields.jl b/src/YM/YMfields.jl index e22625a..95d6777 100644 --- a/src/YM/YMfields.jl +++ b/src/YM/YMfields.jl @@ -15,7 +15,7 @@ Given an algebra field with natural indexing, this routine sets the components to random Gaussian distributed values. If SF boundary conditions are used, the force at the boundaries is set to zero. """ function randomize!(f, lp::SpaceParm, ymws::YMworkspace) - + if ymws.ALG == SU2alg @timeit "Randomize SU(2) algebra field" begin m = CUDA.randn(ymws.PRC, lp.bsz,lp.ndim,3,lp.rsz) @@ -54,31 +54,44 @@ function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::SpaceParm{N,M,BC_PERIODI return nothing end -function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::SpaceParm{N,M,B,D}) where {T,N,M,B,D} +function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,N,M,D} + + @inbounds begin + b = Int64(CUDA.threadIdx().x) + r = Int64(CUDA.blockIdx().x) + for id in 1:lp.ndim + frc[b,id,r] = SU3alg(m[b,id,1,r], m[b,id,2,r], m[b,id,3,r], + m[b,id,4,r], m[b,id,5,r], m[b,id,6,r], + m[b,id,7,r], m[b,id,8,r]) + end + end + + return nothing +end + +function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {T,N,M,D} @inbounds begin b = Int64(CUDA.threadIdx().x) r = Int64(CUDA.blockIdx().x) it = point_time((b,r), lp) - if ((B==BC_SF_AFWB)||(B==BC_SF_ORBI)) - if it == 1 - for id in 1:lp.ndim-1 - frc[b,id,r] = zero(T) - end - frc[b,N,r] = SU3alg(m[b,N,1,r], m[b,N,2,r], m[b,N,3,r], - m[b,N,4,r], m[b,N,5,r], m[b,N,6,r], - m[b,N,7,r], m[b,N,8,r]) - else - for id in 1:lp.ndim - frc[b,id,r] = SU3alg(m[b,id,1,r], m[b,id,2,r], m[b,id,3,r], - m[b,id,4,r], m[b,id,5,r], m[b,id,6,r], - m[b,id,7,r], m[b,id,8,r]) - end + if it == 1 + for id in 1:lp.ndim-1 + frc[b,id,r] = zero(T) + end + frc[b,N,r] = SU3alg(m[b,N,1,r], m[b,N,2,r], m[b,N,3,r], + m[b,N,4,r], m[b,N,5,r], m[b,N,6,r], + m[b,N,7,r], m[b,N,8,r]) + else + for id in 1:lp.ndim + frc[b,id,r] = SU3alg(m[b,id,1,r], m[b,id,2,r], m[b,id,3,r], + m[b,id,4,r], m[b,id,5,r], m[b,id,6,r], + m[b,id,7,r], m[b,id,8,r]) end end end - + return nothing end