Some errors

This commit is contained in:
Fernando P.Panadero 2024-05-21 15:38:28 +02:00
parent 182fa82d13
commit abfd0bd72a
5 changed files with 135 additions and 109 deletions

View file

@ -120,7 +120,7 @@ end
Randomizes the SU2fund / SU3fund fermion field. If the argument t is present, it only randomizes that time-slice. Randomizes the SU2fund / SU3fund fermion field. If the argument t is present, it only randomizes that time-slice.
""" """
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::SpaceParm, t::Int64 = 0) where {T} function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::SpaceParm{4,6,BC_PERIODIC,D}, t::Int64 = 0) where {T,D}
@timeit "Randomize pseudofermion field" begin @timeit "Randomize pseudofermion field" begin
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4) # complex generation not suported for Julia 1.5.4 p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4) # complex generation not suported for Julia 1.5.4
@ -132,6 +132,19 @@ function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::SpaceParm, t:
return nothing return nothing
end end
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}, t::Int64 = 0) where {T,D}
@timeit "Randomize pseudofermion field" begin
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4) # complex generation not suported for Julia 1.5.4
CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su3!(f,p,lp,t)
end
end
SF_bndfix!(f,lp)
return nothing
end
function krnl_assign_pf_su3!(f::AbstractArray, p , lp::SpaceParm, t::Int64) function krnl_assign_pf_su3!(f::AbstractArray, p , lp::SpaceParm, t::Int64)
@inbounds begin @inbounds begin

View file

@ -30,7 +30,7 @@ function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::D
ymws.mom .= int.e0[k].*ymws.mom .+ int.e1[k].*ymws.frc1 ymws.mom .= int.e0[k].*ymws.mom .+ int.e1[k].*ymws.frc1
U .= expm.(U, ymws.mom, 2*eps) U .= expm.(U, ymws.mom, 2*eps)
end end
end end
end end
@ -86,7 +86,7 @@ function backflow(psi, U, Dt, maxnsave::Int64, gp::GaugeParm, dpar::DiracParam,
@timeit "CPU to GPU" copyto!(U,U0) @timeit "CPU to GPU" copyto!(U,U0)
for j in dsave:-1:1 for j in dsave:-1:1
@timeit "CPU to GPU" copyto!(U,U0) @timeit "CPU to GPU" copyto!(U,U0)
for k in 1:j-1 for k in 1:j-1
flw(U, int, 1, eps_all[k], gp, lp, ymws) flw(U, int, 1, eps_all[k], gp, lp, ymws)
end end
@ -209,20 +209,20 @@ Computes /`/` \\nabla^* \\nabla /`/` `si` and stores it in `si`.
""" """
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D} function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
@timeit "Laplacian" begin @timeit "Laplacian" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp)
end
end end
end
return nothing return nothing
end end
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}) where {D} function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}) where {D}
SF_bndfix!(si,lp) SF_bndfix!(si,lp)
@timeit "Laplacian" begin @timeit "Laplacian" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp)
end
end end
end
SF_bndfix!(so,lp) SF_bndfix!(so,lp)
return nothing return nothing
end end
@ -234,9 +234,9 @@ function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
@inbounds begin @inbounds begin
if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end]) if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end]))
so[b,r] = -4*si[b,r] so[b,r] = -4*si[b,r]
bu1, ru1 = up((b,r), 1, lp) bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp) bd1, rd1 = dw((b,r), 1, lp)
@ -247,10 +247,10 @@ function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
bu4, ru4 = up((b,r), 4, lp) bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp) bd4, rd4 = dw((b,r), 4, lp)
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) + so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) + th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) + th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) ) th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
end end
end end
@ -265,19 +265,19 @@ function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,BC_PERIODIC,D}) where
so[b,r] = -4*si[b,r] so[b,r] = -4*si[b,r]
bu1, ru1 = up((b,r), 1, lp) bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp) bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp) bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp) bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp) bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp) bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp) bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp) bd4, rd4 = dw((b,r), 4, lp)
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) + so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) + th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) + th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) ) th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
end end
return nothing return nothing
@ -291,9 +291,9 @@ function krnl_Nablanabla(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},Sp
if (point_time((b,r),lp) != 1) if (point_time((b,r),lp) != 1)
so[b,r] = -4*si[b,r] so[b,r] = -4*si[b,r]
bu1, ru1 = up((b,r), 1, lp) bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp) bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp) bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp) bd2, rd2 = dw((b,r), 2, lp)
@ -302,10 +302,10 @@ function krnl_Nablanabla(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},Sp
bu4, ru4 = up((b,r), 4, lp) bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp) bd4, rd4 = dw((b,r), 4, lp)
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) + so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) + th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) + th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) ) th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
end end
end end
@ -325,40 +325,40 @@ Computes /`/` //slashed{D}^2 si /`/` ans stores it in `si`.
""" """
function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D} function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
@timeit "DwdagDw" begin @timeit "DwdagDw" begin
@timeit "g5Dslsh" begin @timeit "g5Dslsh" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(dws.st, U, si, dpar.th, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(dws.st, U, si, dpar.th, lp)
end end
end end
if abs(dpar.csw) > 1.0E-10 if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin @timeit "Dw_improvement" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(dws.st, dws.csw, dpar.csw, si, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(dws.st, dws.csw, dpar.csw, si, lp)
end
end end
end end
end
@timeit "g5Dslsh" begin @timeit "g5Dslsh" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(so, U, dws.st, dpar.th, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(so, U, dws.st, dpar.th, lp)
end end
end end
if abs(dpar.csw) > 1.0E-10 if abs(dpar.csw) > 1.0E-10
@timeit "Dw_improvement" begin @timeit "Dw_improvement" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(so, dws.csw, dpar.csw, dws.st, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(so, dws.csw, dpar.csw, dws.st, lp)
end
end end
end end
end end
end
return nothing return nothing
end end
@ -382,12 +382,12 @@ function krnl_g5Dslsh!(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},Spac
bu4, ru4 = up((b,r), 4, lp) bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp) bd4, rd4 = dw((b,r), 4, lp)
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) + so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) + th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) + th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) ) th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
so[b,r] = dmul(Gamma{5}, so[b,r]) so[b,r] = dmul(Gamma{5}, so[b,r])
end end
end end
return nothing return nothing
@ -402,19 +402,19 @@ function krnl_g5Dslsh!(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {D,B}
so[b,r] = 4*si[b,r] so[b,r] = 4*si[b,r]
bu1, ru1 = up((b,r), 1, lp) bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp) bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp) bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp) bd2, rd2 = dw((b,r), 2, lp)
bu3, ru3 = up((b,r), 3, lp) bu3, ru3 = up((b,r), 3, lp)
bd3, rd3 = dw((b,r), 3, lp) bd3, rd3 = dw((b,r), 3, lp)
bu4, ru4 = up((b,r), 4, lp) bu4, ru4 = up((b,r), 4, lp)
bd4, rd4 = dw((b,r), 4, lp) bd4, rd4 = dw((b,r), 4, lp)
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) + so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) + th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) + th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) ) th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
so[b,r] = dmul(Gamma{5}, so[b,r]) so[b,r] = dmul(Gamma{5}, so[b,r])
end end
@ -426,11 +426,11 @@ function krnl_g5Dslsh_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,
@inbounds begin @inbounds begin
b = Int64(CUDA.threadIdx().x); b = Int64(CUDA.threadIdx().x);
r = Int64(CUDA.blockIdx().x) r = Int64(CUDA.blockIdx().x)
so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r]) so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))) -Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
end end
return nothing return nothing
@ -442,15 +442,15 @@ function krnl_g5Dslsh_impr!(so, Fcsw, csw, si, lp::Union{SpaceParm{4,6,BC_SF_ORB
@inbounds begin @inbounds begin
b = Int64(CUDA.threadIdx().x); b = Int64(CUDA.threadIdx().x);
r = Int64(CUDA.blockIdx().x) r = Int64(CUDA.blockIdx().x)
if (point_time((b,r),lp) != 1) if (point_time((b,r),lp) != 1)
so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r]) so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r]))) -Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
end end
return nothing return nothing
end end
end end

View file

@ -17,13 +17,13 @@ function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6
if abs(dpar.csw) > 1.0E-10 if abs(dpar.csw) > 1.0E-10
@timeit "Dw" begin @timeit "Dw" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
end end
end end
else else
@timeit "Dw" begin @timeit "Dw" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
end end
end end
end end
@ -32,7 +32,7 @@ function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6
return nothing return nothing
end end
function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,BC_OPEN,D}) where {D} function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
# The field si is assumed to be zero at t = 0,T # The field si is assumed to be zero at t = 0,T
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x) b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
@ -67,7 +67,7 @@ function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,BC_OPE
return nothing return nothing
end end
function krnl_Dw!(so, U, si, m0, tm, th, lp::SpaceParm{4,6,BC_OPEN,D}) where {D} function krnl_Dw!(so, U, si, m0, tm, th, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
# The field si is assumed to be zero at t = 0,T # The field si is assumed to be zero at t = 0,T
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x) b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
@ -113,13 +113,13 @@ function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4
if abs(dpar.csw) > 1.0E-10 if abs(dpar.csw) > 1.0E-10
@timeit "g5Dw" begin @timeit "g5Dw" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
end end
end end
else else
@timeit "g5Dw" begin @timeit "g5Dw" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
end end
end end
end end
@ -128,7 +128,7 @@ function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4
return nothing return nothing
end end
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,BC_OPEN,D}) where {D} function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
# The field si is assumed to be zero at t = 0,T # The field si is assumed to be zero at t = 0,T
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x) b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
@ -166,7 +166,7 @@ function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,BC_O
return nothing return nothing
end end
function krnl_g5Dw!(so, U, si, m0, tm, th, lp::SpaceParm{4,6,BC_OPEN,D}) where {D} function krnl_g5Dw!(so, U, si, m0, tm, th, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
# The field si is assumed to be zero at t = 0,T # The field si is assumed to be zero at t = 0,T
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x) b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
@ -215,13 +215,13 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpacePar
@timeit "g5Dw" begin @timeit "g5Dw" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
end end
end end
SF_bndfix!(dws.st,lp) SF_bndfix!(dws.st,lp)
@timeit "g5Dw" begin @timeit "g5Dw" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
end end
end end
SF_bndfix!(so,lp) SF_bndfix!(so,lp)
@ -231,13 +231,13 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpacePar
@timeit "g5Dw" begin @timeit "g5Dw" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
end end
end end
SF_bndfix!(dws.st,lp) SF_bndfix!(dws.st,lp)
@timeit "g5Dw" begin @timeit "g5Dw" begin
CUDA.@sync begin CUDA.@sync begin
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, lp) CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, dpar.ct, lp)
end end
end end
SF_bndfix!(so,lp) SF_bndfix!(so,lp)

View file

@ -11,7 +11,7 @@
## ##
## OPEN DEBERIA ESTAR BIEN LAS ACCIONES, CHECKEAR. LAS FUERZAS FALTAN ## OPEN
## ##
function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, ztw, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,NB,N,M,D} function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, ztw, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,NB,N,M,D}
@ -113,7 +113,7 @@ function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::SpaceParm{N,M,B
gt1 = U[bu1,id2,ru1] gt1 = U[bu1,id2,ru1]
if ( (it == lp.iL[end]) || (it == 1)) && !TOBC if ( (it == lp.iL[end]) || (it == 1)) && !TOBC
S += 0.5*cG*(c0*tr(g2*ga/U[bu2,id1,ru2])) S += 0.5*cG*(tr(U[b,id1,r]*gt1 / (U[b,id2,r]*U[bu2,id1,ru2])))
elseif (it == lp.iL[end]) && TOBC elseif (it == lp.iL[end]) && TOBC
nothing nothing
else else

View file

@ -15,7 +15,7 @@
Given an algebra field with natural indexing, this routine sets the components to random Gaussian distributed values. If SF boundary conditions are used, the force at the boundaries is set to zero. Given an algebra field with natural indexing, this routine sets the components to random Gaussian distributed values. If SF boundary conditions are used, the force at the boundaries is set to zero.
""" """
function randomize!(f, lp::SpaceParm, ymws::YMworkspace) function randomize!(f, lp::SpaceParm, ymws::YMworkspace)
if ymws.ALG == SU2alg if ymws.ALG == SU2alg
@timeit "Randomize SU(2) algebra field" begin @timeit "Randomize SU(2) algebra field" begin
m = CUDA.randn(ymws.PRC, lp.bsz,lp.ndim,3,lp.rsz) m = CUDA.randn(ymws.PRC, lp.bsz,lp.ndim,3,lp.rsz)
@ -54,31 +54,44 @@ function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::SpaceParm{N,M,BC_PERIODI
return nothing return nothing
end end
function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::SpaceParm{N,M,B,D}) where {T,N,M,B,D} function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,N,M,D}
@inbounds begin
b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x)
for id in 1:lp.ndim
frc[b,id,r] = SU3alg(m[b,id,1,r], m[b,id,2,r], m[b,id,3,r],
m[b,id,4,r], m[b,id,5,r], m[b,id,6,r],
m[b,id,7,r], m[b,id,8,r])
end
end
return nothing
end
function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {T,N,M,D}
@inbounds begin @inbounds begin
b = Int64(CUDA.threadIdx().x) b = Int64(CUDA.threadIdx().x)
r = Int64(CUDA.blockIdx().x) r = Int64(CUDA.blockIdx().x)
it = point_time((b,r), lp) it = point_time((b,r), lp)
if ((B==BC_SF_AFWB)||(B==BC_SF_ORBI)) if it == 1
if it == 1 for id in 1:lp.ndim-1
for id in 1:lp.ndim-1 frc[b,id,r] = zero(T)
frc[b,id,r] = zero(T) end
end frc[b,N,r] = SU3alg(m[b,N,1,r], m[b,N,2,r], m[b,N,3,r],
frc[b,N,r] = SU3alg(m[b,N,1,r], m[b,N,2,r], m[b,N,3,r], m[b,N,4,r], m[b,N,5,r], m[b,N,6,r],
m[b,N,4,r], m[b,N,5,r], m[b,N,6,r], m[b,N,7,r], m[b,N,8,r])
m[b,N,7,r], m[b,N,8,r]) else
else for id in 1:lp.ndim
for id in 1:lp.ndim frc[b,id,r] = SU3alg(m[b,id,1,r], m[b,id,2,r], m[b,id,3,r],
frc[b,id,r] = SU3alg(m[b,id,1,r], m[b,id,2,r], m[b,id,3,r], m[b,id,4,r], m[b,id,5,r], m[b,id,6,r],
m[b,id,4,r], m[b,id,5,r], m[b,id,6,r], m[b,id,7,r], m[b,id,8,r])
m[b,id,7,r], m[b,id,8,r])
end
end end
end end
end end
return nothing return nothing
end end