mirror of
https://igit.ific.uv.es/alramos/latticegpu.jl.git
synced 2025-05-14 19:23:42 +02:00
Some errors
This commit is contained in:
parent
182fa82d13
commit
abfd0bd72a
5 changed files with 135 additions and 109 deletions
|
@ -120,7 +120,7 @@ end
|
||||||
|
|
||||||
Randomizes the SU2fund / SU3fund fermion field. If the argument t is present, it only randomizes that time-slice.
|
Randomizes the SU2fund / SU3fund fermion field. If the argument t is present, it only randomizes that time-slice.
|
||||||
"""
|
"""
|
||||||
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::SpaceParm, t::Int64 = 0) where {T}
|
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::SpaceParm{4,6,BC_PERIODIC,D}, t::Int64 = 0) where {T,D}
|
||||||
|
|
||||||
@timeit "Randomize pseudofermion field" begin
|
@timeit "Randomize pseudofermion field" begin
|
||||||
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4) # complex generation not suported for Julia 1.5.4
|
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4) # complex generation not suported for Julia 1.5.4
|
||||||
|
@ -132,6 +132,19 @@ function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::SpaceParm, t:
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
|
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}, t::Int64 = 0) where {T,D}
|
||||||
|
|
||||||
|
@timeit "Randomize pseudofermion field" begin
|
||||||
|
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4) # complex generation not suported for Julia 1.5.4
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su3!(f,p,lp,t)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(f,lp)
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
function krnl_assign_pf_su3!(f::AbstractArray, p , lp::SpaceParm, t::Int64)
|
function krnl_assign_pf_su3!(f::AbstractArray, p , lp::SpaceParm, t::Int64)
|
||||||
|
|
||||||
@inbounds begin
|
@inbounds begin
|
||||||
|
|
|
@ -30,7 +30,7 @@ function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::D
|
||||||
|
|
||||||
ymws.mom .= int.e0[k].*ymws.mom .+ int.e1[k].*ymws.frc1
|
ymws.mom .= int.e0[k].*ymws.mom .+ int.e1[k].*ymws.frc1
|
||||||
U .= expm.(U, ymws.mom, 2*eps)
|
U .= expm.(U, ymws.mom, 2*eps)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -86,7 +86,7 @@ function backflow(psi, U, Dt, maxnsave::Int64, gp::GaugeParm, dpar::DiracParam,
|
||||||
@timeit "CPU to GPU" copyto!(U,U0)
|
@timeit "CPU to GPU" copyto!(U,U0)
|
||||||
|
|
||||||
for j in dsave:-1:1
|
for j in dsave:-1:1
|
||||||
@timeit "CPU to GPU" copyto!(U,U0)
|
@timeit "CPU to GPU" copyto!(U,U0)
|
||||||
for k in 1:j-1
|
for k in 1:j-1
|
||||||
flw(U, int, 1, eps_all[k], gp, lp, ymws)
|
flw(U, int, 1, eps_all[k], gp, lp, ymws)
|
||||||
end
|
end
|
||||||
|
@ -209,20 +209,20 @@ Computes /`/` \\nabla^* \\nabla /`/` `si` and stores it in `si`.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||||
@timeit "Laplacian" begin
|
@timeit "Laplacian" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp)
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
end
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}) where {D}
|
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}) where {D}
|
||||||
SF_bndfix!(si,lp)
|
SF_bndfix!(si,lp)
|
||||||
@timeit "Laplacian" begin
|
@timeit "Laplacian" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp)
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
end
|
||||||
SF_bndfix!(so,lp)
|
SF_bndfix!(so,lp)
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
@ -234,9 +234,9 @@ function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
|
||||||
@inbounds begin
|
@inbounds begin
|
||||||
|
|
||||||
if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end])
|
if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end]))
|
||||||
|
|
||||||
so[b,r] = -4*si[b,r]
|
so[b,r] = -4*si[b,r]
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
@ -247,10 +247,10 @@ function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
|
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
|
||||||
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
|
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
|
||||||
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
|
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
|
||||||
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
|
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -265,19 +265,19 @@ function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,BC_PERIODIC,D}) where
|
||||||
|
|
||||||
so[b,r] = -4*si[b,r]
|
so[b,r] = -4*si[b,r]
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
bu3, ru3 = up((b,r), 3, lp)
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
bd3, rd3 = dw((b,r), 3, lp)
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
|
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
|
||||||
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
|
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
|
||||||
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
|
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
|
||||||
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
|
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
|
||||||
end
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
|
@ -291,9 +291,9 @@ function krnl_Nablanabla(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},Sp
|
||||||
|
|
||||||
if (point_time((b,r),lp) != 1)
|
if (point_time((b,r),lp) != 1)
|
||||||
|
|
||||||
so[b,r] = -4*si[b,r]
|
so[b,r] = -4*si[b,r]
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
@ -302,10 +302,10 @@ function krnl_Nablanabla(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},Sp
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
|
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
|
||||||
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
|
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
|
||||||
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
|
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
|
||||||
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
|
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -325,40 +325,40 @@ Computes /`/` //slashed{D}^2 si /`/` ans stores it in `si`.
|
||||||
"""
|
"""
|
||||||
function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
||||||
|
|
||||||
@timeit "DwdagDw" begin
|
@timeit "DwdagDw" begin
|
||||||
|
|
||||||
@timeit "g5Dslsh" begin
|
@timeit "g5Dslsh" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(dws.st, U, si, dpar.th, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(dws.st, U, si, dpar.th, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
if abs(dpar.csw) > 1.0E-10
|
if abs(dpar.csw) > 1.0E-10
|
||||||
@timeit "Dw_improvement" begin
|
@timeit "Dw_improvement" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(dws.st, dws.csw, dpar.csw, si, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(dws.st, dws.csw, dpar.csw, si, lp)
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
@timeit "g5Dslsh" begin
|
@timeit "g5Dslsh" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(so, U, dws.st, dpar.th, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(so, U, dws.st, dpar.th, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
if abs(dpar.csw) > 1.0E-10
|
if abs(dpar.csw) > 1.0E-10
|
||||||
@timeit "Dw_improvement" begin
|
@timeit "Dw_improvement" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(so, dws.csw, dpar.csw, dws.st, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(so, dws.csw, dpar.csw, dws.st, lp)
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -382,12 +382,12 @@ function krnl_g5Dslsh!(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},Spac
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
so[b,r] = dmul(Gamma{5}, so[b,r])
|
so[b,r] = dmul(Gamma{5}, so[b,r])
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
return nothing
|
return nothing
|
||||||
|
@ -402,19 +402,19 @@ function krnl_g5Dslsh!(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {D,B}
|
||||||
|
|
||||||
so[b,r] = 4*si[b,r]
|
so[b,r] = 4*si[b,r]
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
bu3, ru3 = up((b,r), 3, lp)
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
bd3, rd3 = dw((b,r), 3, lp)
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
so[b,r] = dmul(Gamma{5}, so[b,r])
|
so[b,r] = dmul(Gamma{5}, so[b,r])
|
||||||
end
|
end
|
||||||
|
@ -426,11 +426,11 @@ function krnl_g5Dslsh_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,
|
||||||
|
|
||||||
@inbounds begin
|
@inbounds begin
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x);
|
b = Int64(CUDA.threadIdx().x);
|
||||||
r = Int64(CUDA.blockIdx().x)
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||||
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
|
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
|
||||||
end
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
|
@ -442,15 +442,15 @@ function krnl_g5Dslsh_impr!(so, Fcsw, csw, si, lp::Union{SpaceParm{4,6,BC_SF_ORB
|
||||||
|
|
||||||
@inbounds begin
|
@inbounds begin
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x);
|
b = Int64(CUDA.threadIdx().x);
|
||||||
r = Int64(CUDA.blockIdx().x)
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
if (point_time((b,r),lp) != 1)
|
if (point_time((b,r),lp) != 1)
|
||||||
|
|
||||||
so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||||
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
|
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
|
||||||
end
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -17,13 +17,13 @@ function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6
|
||||||
if abs(dpar.csw) > 1.0E-10
|
if abs(dpar.csw) > 1.0E-10
|
||||||
@timeit "Dw" begin
|
@timeit "Dw" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
@timeit "Dw" begin
|
@timeit "Dw" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -32,7 +32,7 @@ function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
|
||||||
# The field si is assumed to be zero at t = 0,T
|
# The field si is assumed to be zero at t = 0,T
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
@ -67,7 +67,7 @@ function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,BC_OPE
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
function krnl_Dw!(so, U, si, m0, tm, th, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
function krnl_Dw!(so, U, si, m0, tm, th, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
|
||||||
# The field si is assumed to be zero at t = 0,T
|
# The field si is assumed to be zero at t = 0,T
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
@ -113,13 +113,13 @@ function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4
|
||||||
if abs(dpar.csw) > 1.0E-10
|
if abs(dpar.csw) > 1.0E-10
|
||||||
@timeit "g5Dw" begin
|
@timeit "g5Dw" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
@timeit "g5Dw" begin
|
@timeit "g5Dw" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -128,7 +128,7 @@ function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
|
||||||
# The field si is assumed to be zero at t = 0,T
|
# The field si is assumed to be zero at t = 0,T
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
@ -166,7 +166,7 @@ function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,BC_O
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
function krnl_g5Dw!(so, U, si, m0, tm, th, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
function krnl_g5Dw!(so, U, si, m0, tm, th, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
|
||||||
# The field si is assumed to be zero at t = 0,T
|
# The field si is assumed to be zero at t = 0,T
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
@ -215,13 +215,13 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpacePar
|
||||||
|
|
||||||
@timeit "g5Dw" begin
|
@timeit "g5Dw" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
SF_bndfix!(dws.st,lp)
|
SF_bndfix!(dws.st,lp)
|
||||||
@timeit "g5Dw" begin
|
@timeit "g5Dw" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
SF_bndfix!(so,lp)
|
SF_bndfix!(so,lp)
|
||||||
|
@ -231,13 +231,13 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpacePar
|
||||||
|
|
||||||
@timeit "g5Dw" begin
|
@timeit "g5Dw" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
SF_bndfix!(dws.st,lp)
|
SF_bndfix!(dws.st,lp)
|
||||||
@timeit "g5Dw" begin
|
@timeit "g5Dw" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, dpar.ct, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
SF_bndfix!(so,lp)
|
SF_bndfix!(so,lp)
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
|
|
||||||
|
|
||||||
##
|
##
|
||||||
## OPEN DEBERIA ESTAR BIEN LAS ACCIONES, CHECKEAR. LAS FUERZAS FALTAN
|
## OPEN
|
||||||
##
|
##
|
||||||
function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, ztw, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,NB,N,M,D}
|
function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, ztw, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,NB,N,M,D}
|
||||||
|
|
||||||
|
@ -113,7 +113,7 @@ function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::SpaceParm{N,M,B
|
||||||
gt1 = U[bu1,id2,ru1]
|
gt1 = U[bu1,id2,ru1]
|
||||||
|
|
||||||
if ( (it == lp.iL[end]) || (it == 1)) && !TOBC
|
if ( (it == lp.iL[end]) || (it == 1)) && !TOBC
|
||||||
S += 0.5*cG*(c0*tr(g2*ga/U[bu2,id1,ru2]))
|
S += 0.5*cG*(tr(U[b,id1,r]*gt1 / (U[b,id2,r]*U[bu2,id1,ru2])))
|
||||||
elseif (it == lp.iL[end]) && TOBC
|
elseif (it == lp.iL[end]) && TOBC
|
||||||
nothing
|
nothing
|
||||||
else
|
else
|
||||||
|
|
|
@ -54,27 +54,40 @@ function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::SpaceParm{N,M,BC_PERIODI
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::SpaceParm{N,M,B,D}) where {T,N,M,B,D}
|
function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,N,M,D}
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
b = Int64(CUDA.threadIdx().x)
|
||||||
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
for id in 1:lp.ndim
|
||||||
|
frc[b,id,r] = SU3alg(m[b,id,1,r], m[b,id,2,r], m[b,id,3,r],
|
||||||
|
m[b,id,4,r], m[b,id,5,r], m[b,id,6,r],
|
||||||
|
m[b,id,7,r], m[b,id,8,r])
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {T,N,M,D}
|
||||||
|
|
||||||
@inbounds begin
|
@inbounds begin
|
||||||
b = Int64(CUDA.threadIdx().x)
|
b = Int64(CUDA.threadIdx().x)
|
||||||
r = Int64(CUDA.blockIdx().x)
|
r = Int64(CUDA.blockIdx().x)
|
||||||
it = point_time((b,r), lp)
|
it = point_time((b,r), lp)
|
||||||
|
|
||||||
if ((B==BC_SF_AFWB)||(B==BC_SF_ORBI))
|
if it == 1
|
||||||
if it == 1
|
for id in 1:lp.ndim-1
|
||||||
for id in 1:lp.ndim-1
|
frc[b,id,r] = zero(T)
|
||||||
frc[b,id,r] = zero(T)
|
end
|
||||||
end
|
frc[b,N,r] = SU3alg(m[b,N,1,r], m[b,N,2,r], m[b,N,3,r],
|
||||||
frc[b,N,r] = SU3alg(m[b,N,1,r], m[b,N,2,r], m[b,N,3,r],
|
m[b,N,4,r], m[b,N,5,r], m[b,N,6,r],
|
||||||
m[b,N,4,r], m[b,N,5,r], m[b,N,6,r],
|
m[b,N,7,r], m[b,N,8,r])
|
||||||
m[b,N,7,r], m[b,N,8,r])
|
else
|
||||||
else
|
for id in 1:lp.ndim
|
||||||
for id in 1:lp.ndim
|
frc[b,id,r] = SU3alg(m[b,id,1,r], m[b,id,2,r], m[b,id,3,r],
|
||||||
frc[b,id,r] = SU3alg(m[b,id,1,r], m[b,id,2,r], m[b,id,3,r],
|
m[b,id,4,r], m[b,id,5,r], m[b,id,6,r],
|
||||||
m[b,id,4,r], m[b,id,5,r], m[b,id,6,r],
|
m[b,id,7,r], m[b,id,8,r])
|
||||||
m[b,id,7,r], m[b,id,8,r])
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue