mirror of
https://igit.ific.uv.es/alramos/latticegpu.jl.git
synced 2025-05-14 19:23:42 +02:00
Some errors
This commit is contained in:
parent
182fa82d13
commit
abfd0bd72a
5 changed files with 135 additions and 109 deletions
|
@ -120,7 +120,7 @@ end
|
||||||
|
|
||||||
Randomizes the SU2fund / SU3fund fermion field. If the argument t is present, it only randomizes that time-slice.
|
Randomizes the SU2fund / SU3fund fermion field. If the argument t is present, it only randomizes that time-slice.
|
||||||
"""
|
"""
|
||||||
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::SpaceParm, t::Int64 = 0) where {T}
|
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::SpaceParm{4,6,BC_PERIODIC,D}, t::Int64 = 0) where {T,D}
|
||||||
|
|
||||||
@timeit "Randomize pseudofermion field" begin
|
@timeit "Randomize pseudofermion field" begin
|
||||||
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4) # complex generation not suported for Julia 1.5.4
|
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4) # complex generation not suported for Julia 1.5.4
|
||||||
|
@ -132,6 +132,19 @@ function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::SpaceParm, t:
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
|
function pfrandomize!(f::AbstractArray{Spinor{4, SU3fund{T}}}, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}, t::Int64 = 0) where {T,D}
|
||||||
|
|
||||||
|
@timeit "Randomize pseudofermion field" begin
|
||||||
|
p = ntuple(i->CUDA.randn(T, lp.bsz, 3, lp.rsz,2),4) # complex generation not suported for Julia 1.5.4
|
||||||
|
CUDA.@sync begin
|
||||||
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_pf_su3!(f,p,lp,t)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
SF_bndfix!(f,lp)
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
function krnl_assign_pf_su3!(f::AbstractArray, p , lp::SpaceParm, t::Int64)
|
function krnl_assign_pf_su3!(f::AbstractArray, p , lp::SpaceParm, t::Int64)
|
||||||
|
|
||||||
@inbounds begin
|
@inbounds begin
|
||||||
|
|
|
@ -30,7 +30,7 @@ function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::D
|
||||||
|
|
||||||
ymws.mom .= int.e0[k].*ymws.mom .+ int.e1[k].*ymws.frc1
|
ymws.mom .= int.e0[k].*ymws.mom .+ int.e1[k].*ymws.frc1
|
||||||
U .= expm.(U, ymws.mom, 2*eps)
|
U .= expm.(U, ymws.mom, 2*eps)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -86,7 +86,7 @@ function backflow(psi, U, Dt, maxnsave::Int64, gp::GaugeParm, dpar::DiracParam,
|
||||||
@timeit "CPU to GPU" copyto!(U,U0)
|
@timeit "CPU to GPU" copyto!(U,U0)
|
||||||
|
|
||||||
for j in dsave:-1:1
|
for j in dsave:-1:1
|
||||||
@timeit "CPU to GPU" copyto!(U,U0)
|
@timeit "CPU to GPU" copyto!(U,U0)
|
||||||
for k in 1:j-1
|
for k in 1:j-1
|
||||||
flw(U, int, 1, eps_all[k], gp, lp, ymws)
|
flw(U, int, 1, eps_all[k], gp, lp, ymws)
|
||||||
end
|
end
|
||||||
|
@ -209,20 +209,20 @@ Computes /`/` \\nabla^* \\nabla /`/` `si` and stores it in `si`.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||||
@timeit "Laplacian" begin
|
@timeit "Laplacian" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp)
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
end
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}) where {D}
|
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}) where {D}
|
||||||
SF_bndfix!(si,lp)
|
SF_bndfix!(si,lp)
|
||||||
@timeit "Laplacian" begin
|
@timeit "Laplacian" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp)
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
end
|
||||||
SF_bndfix!(so,lp)
|
SF_bndfix!(so,lp)
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
@ -234,9 +234,9 @@ function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
|
||||||
@inbounds begin
|
@inbounds begin
|
||||||
|
|
||||||
if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end])
|
if ((point_time((b,r),lp) != 1) && (point_time((b,r),lp) != lp.iL[end]))
|
||||||
|
|
||||||
so[b,r] = -4*si[b,r]
|
so[b,r] = -4*si[b,r]
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
|
@ -247,10 +247,10 @@ function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
|
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
|
||||||
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
|
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
|
||||||
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
|
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
|
||||||
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
|
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -265,19 +265,19 @@ function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,BC_PERIODIC,D}) where
|
||||||
|
|
||||||
so[b,r] = -4*si[b,r]
|
so[b,r] = -4*si[b,r]
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
bu3, ru3 = up((b,r), 3, lp)
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
bd3, rd3 = dw((b,r), 3, lp)
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
|
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
|
||||||
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
|
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
|
||||||
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
|
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
|
||||||
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
|
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
|
||||||
end
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
|
@ -291,9 +291,9 @@ function krnl_Nablanabla(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},Sp
|
||||||
|
|
||||||
if (point_time((b,r),lp) != 1)
|
if (point_time((b,r),lp) != 1)
|
||||||
|
|
||||||
so[b,r] = -4*si[b,r]
|
so[b,r] = -4*si[b,r]
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
@ -302,10 +302,10 @@ function krnl_Nablanabla(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},Sp
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
|
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
|
||||||
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
|
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
|
||||||
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
|
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
|
||||||
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
|
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -325,40 +325,40 @@ Computes /`/` //slashed{D}^2 si /`/` ans stores it in `si`.
|
||||||
"""
|
"""
|
||||||
function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
||||||
|
|
||||||
@timeit "DwdagDw" begin
|
@timeit "DwdagDw" begin
|
||||||
|
|
||||||
@timeit "g5Dslsh" begin
|
@timeit "g5Dslsh" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(dws.st, U, si, dpar.th, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(dws.st, U, si, dpar.th, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
if abs(dpar.csw) > 1.0E-10
|
if abs(dpar.csw) > 1.0E-10
|
||||||
@timeit "Dw_improvement" begin
|
@timeit "Dw_improvement" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(dws.st, dws.csw, dpar.csw, si, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(dws.st, dws.csw, dpar.csw, si, lp)
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
@timeit "g5Dslsh" begin
|
@timeit "g5Dslsh" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(so, U, dws.st, dpar.th, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(so, U, dws.st, dpar.th, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
if abs(dpar.csw) > 1.0E-10
|
if abs(dpar.csw) > 1.0E-10
|
||||||
@timeit "Dw_improvement" begin
|
@timeit "Dw_improvement" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(so, dws.csw, dpar.csw, dws.st, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(so, dws.csw, dpar.csw, dws.st, lp)
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -382,12 +382,12 @@ function krnl_g5Dslsh!(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},Spac
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
so[b,r] = dmul(Gamma{5}, so[b,r])
|
so[b,r] = dmul(Gamma{5}, so[b,r])
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
return nothing
|
return nothing
|
||||||
|
@ -402,19 +402,19 @@ function krnl_g5Dslsh!(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {D,B}
|
||||||
|
|
||||||
so[b,r] = 4*si[b,r]
|
so[b,r] = 4*si[b,r]
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
bu3, ru3 = up((b,r), 3, lp)
|
bu3, ru3 = up((b,r), 3, lp)
|
||||||
bd3, rd3 = dw((b,r), 3, lp)
|
bd3, rd3 = dw((b,r), 3, lp)
|
||||||
bu4, ru4 = up((b,r), 4, lp)
|
bu4, ru4 = up((b,r), 4, lp)
|
||||||
bd4, rd4 = dw((b,r), 4, lp)
|
bd4, rd4 = dw((b,r), 4, lp)
|
||||||
|
|
||||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||||
|
|
||||||
so[b,r] = dmul(Gamma{5}, so[b,r])
|
so[b,r] = dmul(Gamma{5}, so[b,r])
|
||||||
end
|
end
|
||||||
|
@ -426,11 +426,11 @@ function krnl_g5Dslsh_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,
|
||||||
|
|
||||||
@inbounds begin
|
@inbounds begin
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x);
|
b = Int64(CUDA.threadIdx().x);
|
||||||
r = Int64(CUDA.blockIdx().x)
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||||
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
|
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
|
||||||
end
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
|
@ -442,15 +442,15 @@ function krnl_g5Dslsh_impr!(so, Fcsw, csw, si, lp::Union{SpaceParm{4,6,BC_SF_ORB
|
||||||
|
|
||||||
@inbounds begin
|
@inbounds begin
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x);
|
b = Int64(CUDA.threadIdx().x);
|
||||||
r = Int64(CUDA.blockIdx().x)
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
|
||||||
if (point_time((b,r),lp) != 1)
|
if (point_time((b,r),lp) != 1)
|
||||||
|
|
||||||
so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||||
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
|
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
|
||||||
end
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -17,13 +17,13 @@ function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6
|
||||||
if abs(dpar.csw) > 1.0E-10
|
if abs(dpar.csw) > 1.0E-10
|
||||||
@timeit "Dw" begin
|
@timeit "Dw" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
@timeit "Dw" begin
|
@timeit "Dw" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -32,7 +32,7 @@ function Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
|
||||||
# The field si is assumed to be zero at t = 0,T
|
# The field si is assumed to be zero at t = 0,T
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
@ -67,7 +67,7 @@ function krnl_Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,BC_OPE
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
function krnl_Dw!(so, U, si, m0, tm, th, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
function krnl_Dw!(so, U, si, m0, tm, th, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
|
||||||
# The field si is assumed to be zero at t = 0,T
|
# The field si is assumed to be zero at t = 0,T
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
@ -113,13 +113,13 @@ function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4
|
||||||
if abs(dpar.csw) > 1.0E-10
|
if abs(dpar.csw) > 1.0E-10
|
||||||
@timeit "g5Dw" begin
|
@timeit "g5Dw" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
@timeit "g5Dw" begin
|
@timeit "g5Dw" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -128,7 +128,7 @@ function g5Dw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
|
||||||
# The field si is assumed to be zero at t = 0,T
|
# The field si is assumed to be zero at t = 0,T
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
@ -166,7 +166,7 @@ function krnl_g5Dwimpr!(so, U, si, Fcsw, m0, tm, th, csw, lp::SpaceParm{4,6,BC_O
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
function krnl_g5Dw!(so, U, si, m0, tm, th, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
function krnl_g5Dw!(so, U, si, m0, tm, th, ct, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
|
||||||
# The field si is assumed to be zero at t = 0,T
|
# The field si is assumed to be zero at t = 0,T
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
@ -215,13 +215,13 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpacePar
|
||||||
|
|
||||||
@timeit "g5Dw" begin
|
@timeit "g5Dw" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(dws.st, U, si, dws.csw, dpar.m0, dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
SF_bndfix!(dws.st,lp)
|
SF_bndfix!(dws.st,lp)
|
||||||
@timeit "g5Dw" begin
|
@timeit "g5Dw" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dwimpr!(so, U, dws.st, dws.csw, dpar.m0, -dpar.tm, dpar.th, dpar.csw, dpar.ct, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
SF_bndfix!(so,lp)
|
SF_bndfix!(so,lp)
|
||||||
|
@ -231,13 +231,13 @@ function DwdagDw!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpacePar
|
||||||
|
|
||||||
@timeit "g5Dw" begin
|
@timeit "g5Dw" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(dws.st, U, si, dpar.m0, dpar.tm, dpar.th, dpar.ct, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
SF_bndfix!(dws.st,lp)
|
SF_bndfix!(dws.st,lp)
|
||||||
@timeit "g5Dw" begin
|
@timeit "g5Dw" begin
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dw!(so, U, dws.st, dpar.m0, -dpar.tm, dpar.th, dpar.ct, lp)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
SF_bndfix!(so,lp)
|
SF_bndfix!(so,lp)
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
|
|
||||||
|
|
||||||
##
|
##
|
||||||
## OPEN DEBERIA ESTAR BIEN LAS ACCIONES, CHECKEAR. LAS FUERZAS FALTAN
|
## OPEN
|
||||||
##
|
##
|
||||||
function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, ztw, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,NB,N,M,D}
|
function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, ztw, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,NB,N,M,D}
|
||||||
|
|
||||||
|
@ -113,7 +113,7 @@ function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::SpaceParm{N,M,B
|
||||||
gt1 = U[bu1,id2,ru1]
|
gt1 = U[bu1,id2,ru1]
|
||||||
|
|
||||||
if ( (it == lp.iL[end]) || (it == 1)) && !TOBC
|
if ( (it == lp.iL[end]) || (it == 1)) && !TOBC
|
||||||
S += 0.5*cG*(c0*tr(g2*ga/U[bu2,id1,ru2]))
|
S += 0.5*cG*(tr(U[b,id1,r]*gt1 / (U[b,id2,r]*U[bu2,id1,ru2])))
|
||||||
elseif (it == lp.iL[end]) && TOBC
|
elseif (it == lp.iL[end]) && TOBC
|
||||||
nothing
|
nothing
|
||||||
else
|
else
|
||||||
|
|
|
@ -15,7 +15,7 @@
|
||||||
Given an algebra field with natural indexing, this routine sets the components to random Gaussian distributed values. If SF boundary conditions are used, the force at the boundaries is set to zero.
|
Given an algebra field with natural indexing, this routine sets the components to random Gaussian distributed values. If SF boundary conditions are used, the force at the boundaries is set to zero.
|
||||||
"""
|
"""
|
||||||
function randomize!(f, lp::SpaceParm, ymws::YMworkspace)
|
function randomize!(f, lp::SpaceParm, ymws::YMworkspace)
|
||||||
|
|
||||||
if ymws.ALG == SU2alg
|
if ymws.ALG == SU2alg
|
||||||
@timeit "Randomize SU(2) algebra field" begin
|
@timeit "Randomize SU(2) algebra field" begin
|
||||||
m = CUDA.randn(ymws.PRC, lp.bsz,lp.ndim,3,lp.rsz)
|
m = CUDA.randn(ymws.PRC, lp.bsz,lp.ndim,3,lp.rsz)
|
||||||
|
@ -54,31 +54,44 @@ function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::SpaceParm{N,M,BC_PERIODI
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::SpaceParm{N,M,B,D}) where {T,N,M,B,D}
|
function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,N,M,D}
|
||||||
|
|
||||||
|
@inbounds begin
|
||||||
|
b = Int64(CUDA.threadIdx().x)
|
||||||
|
r = Int64(CUDA.blockIdx().x)
|
||||||
|
for id in 1:lp.ndim
|
||||||
|
frc[b,id,r] = SU3alg(m[b,id,1,r], m[b,id,2,r], m[b,id,3,r],
|
||||||
|
m[b,id,4,r], m[b,id,5,r], m[b,id,6,r],
|
||||||
|
m[b,id,7,r], m[b,id,8,r])
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
|
||||||
|
function krnl_assign_SU3!(frc::AbstractArray{T}, m, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {T,N,M,D}
|
||||||
|
|
||||||
@inbounds begin
|
@inbounds begin
|
||||||
b = Int64(CUDA.threadIdx().x)
|
b = Int64(CUDA.threadIdx().x)
|
||||||
r = Int64(CUDA.blockIdx().x)
|
r = Int64(CUDA.blockIdx().x)
|
||||||
it = point_time((b,r), lp)
|
it = point_time((b,r), lp)
|
||||||
|
|
||||||
if ((B==BC_SF_AFWB)||(B==BC_SF_ORBI))
|
if it == 1
|
||||||
if it == 1
|
for id in 1:lp.ndim-1
|
||||||
for id in 1:lp.ndim-1
|
frc[b,id,r] = zero(T)
|
||||||
frc[b,id,r] = zero(T)
|
end
|
||||||
end
|
frc[b,N,r] = SU3alg(m[b,N,1,r], m[b,N,2,r], m[b,N,3,r],
|
||||||
frc[b,N,r] = SU3alg(m[b,N,1,r], m[b,N,2,r], m[b,N,3,r],
|
m[b,N,4,r], m[b,N,5,r], m[b,N,6,r],
|
||||||
m[b,N,4,r], m[b,N,5,r], m[b,N,6,r],
|
m[b,N,7,r], m[b,N,8,r])
|
||||||
m[b,N,7,r], m[b,N,8,r])
|
else
|
||||||
else
|
for id in 1:lp.ndim
|
||||||
for id in 1:lp.ndim
|
frc[b,id,r] = SU3alg(m[b,id,1,r], m[b,id,2,r], m[b,id,3,r],
|
||||||
frc[b,id,r] = SU3alg(m[b,id,1,r], m[b,id,2,r], m[b,id,3,r],
|
m[b,id,4,r], m[b,id,5,r], m[b,id,6,r],
|
||||||
m[b,id,4,r], m[b,id,5,r], m[b,id,6,r],
|
m[b,id,7,r], m[b,id,8,r])
|
||||||
m[b,id,7,r], m[b,id,8,r])
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue