mirror of
https://igit.ific.uv.es/alramos/latticegpu.jl.git
synced 2025-05-14 11:13:42 +02:00
Merged Fermion flow
This commit is contained in:
parent
b92f9c92e0
commit
55312a8f71
8 changed files with 776 additions and 50 deletions
|
@ -710,5 +710,8 @@ export Dw!, g5Dw!, DwdagDw!, SF_bndfix!, Csw!, pfrandomize!, mtwmdpar
|
|||
include("DiracIO.jl")
|
||||
export read_prop, save_prop, read_dpar
|
||||
|
||||
include("Diracflow.jl")
|
||||
export Dslash_sq!, flw, backflow
|
||||
|
||||
|
||||
end
|
||||
|
|
423
src/Dirac/Diracflow.jl
Normal file
423
src/Dirac/Diracflow.jl
Normal file
|
@ -0,0 +1,423 @@
|
|||
|
||||
import ..YM.flw, ..YM.force_gauge, ..YM.flw_adapt
|
||||
|
||||
|
||||
function flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, eps, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T}
|
||||
@timeit "Integrating flow equations" begin
|
||||
for i in 1:ns
|
||||
force_gauge(ymws, U, int.c0, 1, gp, lp)
|
||||
|
||||
if int.add_zth
|
||||
add_zth_term(ymws::YMworkspace, U, lp)
|
||||
end
|
||||
|
||||
Nablanabla!(dws.sAp, U, psi, dpar, dws, lp)
|
||||
psi .= psi + 2*int.r*eps*dws.sAp
|
||||
|
||||
ymws.mom .= ymws.frc1
|
||||
U .= expm.(U, ymws.mom, 2*eps*int.r)
|
||||
|
||||
for k in 1:NI
|
||||
force_gauge(ymws, U, int.c0, 1, gp, lp)
|
||||
|
||||
if int.add_zth
|
||||
add_zth_term(ymws::YMworkspace, U, lp)
|
||||
end
|
||||
|
||||
Nablanabla!(dws.sp, U, psi, dpar, dws, lp)
|
||||
dws.sAp .= int.e0[k].*dws.sAp .+ int.e1[k].*dws.sp
|
||||
psi .= psi + 2*eps*dws.sAp
|
||||
|
||||
ymws.mom .= int.e0[k].*ymws.mom .+ int.e1[k].*ymws.frc1
|
||||
U .= expm.(U, ymws.mom, 2*eps)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T} = flw(U, psi, int::FlowIntr{NI,T}, ns::Int64, int.eps, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
|
||||
|
||||
"""
|
||||
function backflow(psi, U, Dt, nsave::Int64, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
|
||||
|
||||
Performs one step back in flow time for the fermion field, according to 1302.5246. The fermion field must me that of the time-slice Dt and is flowed back to the first time-slice
|
||||
nsave is the total number of gauge fields saved in the process
|
||||
|
||||
"""
|
||||
function backflow(psi, U, Dt, maxnsave::Int64, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
|
||||
|
||||
int = wfl_rk3(Float64,0.01,1.0) # Default integrator, it has to be order 3 rk but in can be zfl
|
||||
|
||||
@timeit "Backflow integration" begin
|
||||
@timeit "GPU to CPU" U0 = Array(U)
|
||||
|
||||
nt,eps_all = flw_adapt(U, int, Dt, gp, lp, ymws)
|
||||
|
||||
nsave = min(maxnsave,nt)
|
||||
|
||||
nsave != 0 ? dsave = Int64(floor(nt/nsave)) : dsave = nt
|
||||
Usave = Vector{typeof(U0)}(undef,nsave)
|
||||
|
||||
@timeit "CPU to GPU" copyto!(U,U0)
|
||||
for i in 1:(dsave*nsave)
|
||||
flw(U, int, 1, eps_all[i], gp, lp, ymws)
|
||||
(i%dsave)==0 ? Usave[Int64(i/dsave)] = Array(U) : nothing
|
||||
end
|
||||
|
||||
for j in (nt%nsave):-1:1
|
||||
@timeit "CPU to GPU" copyto!(U,Usave[end])
|
||||
for k in 1:j-1
|
||||
flw(U, int, 1, eps_all[nsave*dsave + k], gp, lp, ymws)
|
||||
end
|
||||
bflw_step!(psi, U, eps_all[nsave*dsave + j], int::FlowIntr, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
|
||||
end
|
||||
|
||||
for i in (nsave-1):-1:1
|
||||
for j in dsave:-1:1
|
||||
@timeit "CPU to GPU" copyto!(U,Usave[i])
|
||||
for k in 1:j-1
|
||||
flw(U, int, 1, eps_all[i*dsave + k], gp, lp, ymws)
|
||||
end
|
||||
bflw_step!(psi, U, eps_all[i*dsave + j], int::FlowIntr, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
|
||||
end
|
||||
end
|
||||
|
||||
@timeit "CPU to GPU" copyto!(U,U0)
|
||||
|
||||
for j in dsave:-1:1
|
||||
@timeit "CPU to GPU" copyto!(U,U0)
|
||||
for k in 1:j-1
|
||||
flw(U, int, 1, eps_all[k], gp, lp, ymws)
|
||||
end
|
||||
bflw_step!(psi, U, eps_all[j], int::FlowIntr, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
|
||||
end
|
||||
|
||||
@timeit "CPU to GPU" copyto!(U,U0)
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
"""
|
||||
function bflw_step!(U, psi, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
|
||||
|
||||
Performs ONE backstep in psi, from t to t-\eps. U is supposed to be the one in t-\eps and is left unchanged. So far, int has to be rk4
|
||||
"""
|
||||
function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace)
|
||||
|
||||
@timeit "Backflow step" begin
|
||||
|
||||
V = copy(U)
|
||||
V .= U
|
||||
|
||||
force_gauge(ymws, U, int.c0, 1, gp, lp)
|
||||
|
||||
if int.add_zth
|
||||
add_zth_term(ymws::YMworkspace, U, lp)
|
||||
end
|
||||
|
||||
ymws.mom .= ymws.frc1
|
||||
U .= expm.(U, ymws.mom, 2*eps*int.r)
|
||||
|
||||
force_gauge(ymws, U, int.c0, 1, gp, lp)
|
||||
|
||||
if int.add_zth
|
||||
add_zth_term(ymws::YMworkspace, U, lp)
|
||||
end
|
||||
|
||||
ymws.mom .= int.e0[1].*ymws.mom .+ int.e1[1].*ymws.frc1
|
||||
U .= expm.(U, ymws.mom, 2*eps)
|
||||
|
||||
Nablanabla!(dws.sp, U, 0.75*2*eps*psi, dpar, dws, lp)
|
||||
|
||||
U .= V
|
||||
|
||||
force_gauge(ymws, U, int.c0, 1, gp, lp)
|
||||
|
||||
if int.add_zth
|
||||
add_zth_term(ymws::YMworkspace, U, lp)
|
||||
end
|
||||
|
||||
U .= expm.(U, ymws.frc1, 2*eps*int.r)
|
||||
|
||||
Nablanabla!(dws.sAp, U, 2*eps*dws.sp, dpar, dws, lp)
|
||||
dws.sAp .= psi + (8/9)*dws.sAp
|
||||
|
||||
U .= V
|
||||
|
||||
Nablanabla!(psi, U, 2*eps*(dws.sAp - (8/9)*dws.sp), dpar, dws, lp)
|
||||
psi .= (1/4)*psi + dws.sp + dws.sAp
|
||||
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
"""
|
||||
|
||||
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
||||
|
||||
Computes /`/` \\nabla^* \\nabla /`/` `si` and stores it in `si`.
|
||||
|
||||
"""
|
||||
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
||||
|
||||
@timeit "Laplacian" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_Nablanabla(so, U, si, dpar.th, lp)
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
||||
|
||||
function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {B,D}
|
||||
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
so[b,r] = -4*si[b,r]
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
|
||||
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
|
||||
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
|
||||
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
||||
function krnl_Nablanabla(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
if (point_time((b,r),lp) != 1)
|
||||
|
||||
so[b,r] = -4*si[b,r]
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
so[b,r] += 0.5*( th[1] * (U[b,1,r]*si[bu1,ru1]) +conj(th[1]) * (U[bd1,1,rd1]\si[bd1,rd1]) +
|
||||
th[2] * (U[b,2,r]*si[bu2,ru2]) +conj(th[2]) * (U[bd2,2,rd2]\si[bd2,rd2]) +
|
||||
th[3] * (U[b,3,r]*si[bu3,ru3]) +conj(th[3]) * (U[bd3,3,rd3]\si[bd3,rd3]) +
|
||||
th[4] * (U[b,4,r]*si[bu4,ru4]) +conj(th[4]) * (U[bd4,4,rd4]\si[bd4,rd4]) )
|
||||
end
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
||||
|
||||
function flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T}
|
||||
|
||||
eps = epsini
|
||||
dt = tend
|
||||
nstp = 0
|
||||
eps_all = Vector{T}(undef,0)
|
||||
while true
|
||||
ns = convert(Int64, floor(dt/eps))
|
||||
if ns > 10
|
||||
flw(U, psi, int, 9, eps, gp, dpar, lp, ymws, dws)
|
||||
ymws.U1 .= U
|
||||
flw(U, psi, int, 1, eps, gp, dpar, lp, ymws, dws)
|
||||
flw(ymws.U1, int, 2, eps/2, gp, lp, ymws)
|
||||
|
||||
dt = dt - 10*eps
|
||||
nstp = nstp + 10
|
||||
push!(eps_all,ntuple(i->eps,10)...)
|
||||
|
||||
# adjust step size
|
||||
ymws.U1 .= ymws.U1 ./ U
|
||||
maxd = CUDA.mapreduce(dev_one, max, ymws.U1, init=zero(tend))
|
||||
eps = min(int.max_eps, 2*eps, int.sft_fac*eps*(int.tol/maxd)^(one(tend)/3))
|
||||
|
||||
else
|
||||
flw(U, psi, int, ns, eps, gp, dpar, lp, ymws, dws)
|
||||
dt = dt - ns*eps
|
||||
|
||||
push!(eps_all,ntuple(i->eps,ns)...)
|
||||
push!(eps_all,dt)
|
||||
|
||||
flw(U, psi, int, 1, dt, gp, dpar, lp, ymws, dws)
|
||||
dt = zero(tend)
|
||||
|
||||
nstp = nstp + ns + 1
|
||||
end
|
||||
|
||||
if dt == zero(tend)
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
return nstp, eps_all
|
||||
end
|
||||
flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, gp::GaugeParm, dpar::DiracParam, lp::SpaceParm, ymws::YMworkspace, dws::DiracWorkspace) where {NI,T} = flw_adapt(U, psi, int, tend, int.eps_ini, gp, dpar, lp, ymws, dws)
|
||||
|
||||
|
||||
|
||||
|
||||
export Nablanabla!, flw, backflow, flw_adapt, bflw_step!
|
||||
|
||||
|
||||
"""
|
||||
|
||||
function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
||||
|
||||
Computes /`/` //slashed{D}^2 si /`/` ans stores it in `si`.
|
||||
|
||||
"""
|
||||
function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) where {B,D}
|
||||
|
||||
@timeit "DwdagDw" begin
|
||||
|
||||
@timeit "g5Dslsh" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(dws.st, U, si, dpar.th, lp)
|
||||
end
|
||||
end
|
||||
|
||||
if abs(dpar.csw) > 1.0E-10
|
||||
@timeit "Dw_improvement" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(dws.st, dws.csw, dpar.csw, si, lp)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@timeit "g5Dslsh" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh!(so, U, dws.st, dpar.th, lp)
|
||||
end
|
||||
end
|
||||
|
||||
if abs(dpar.csw) > 1.0E-10
|
||||
@timeit "Dw_improvement" begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_g5Dslsh_impr!(so, dws.csw, dpar.csw, dws.st, lp)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
||||
function krnl_g5Dslsh!(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
if (point_time((b,r),lp) != 1)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
so[b,r] = 4*si[b,r]
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||
|
||||
so[b,r] = dmul(Gamma{5}, so[b,r])
|
||||
end
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
||||
function krnl_g5Dslsh!(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {D,B}
|
||||
|
||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
@inbounds begin
|
||||
|
||||
so[b,r] = 4*si[b,r]
|
||||
|
||||
bu1, ru1 = up((b,r), 1, lp)
|
||||
bd1, rd1 = dw((b,r), 1, lp)
|
||||
bu2, ru2 = up((b,r), 2, lp)
|
||||
bd2, rd2 = dw((b,r), 2, lp)
|
||||
bu3, ru3 = up((b,r), 3, lp)
|
||||
bd3, rd3 = dw((b,r), 3, lp)
|
||||
bu4, ru4 = up((b,r), 4, lp)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
|
||||
so[b,r] -= 0.5*(th[1]*gpmul(Pgamma{1,-1},U[b,1,r],si[bu1,ru1]) +conj(th[1])*gdagpmul(Pgamma{1,+1},U[bd1,1,rd1],si[bd1,rd1]) +
|
||||
th[2]*gpmul(Pgamma{2,-1},U[b,2,r],si[bu2,ru2]) +conj(th[2])*gdagpmul(Pgamma{2,+1},U[bd2,2,rd2],si[bd2,rd2]) +
|
||||
th[3]*gpmul(Pgamma{3,-1},U[b,3,r],si[bu3,ru3]) +conj(th[3])*gdagpmul(Pgamma{3,+1},U[bd3,3,rd3],si[bd3,rd3]) +
|
||||
th[4]*gpmul(Pgamma{4,-1},U[b,4,r],si[bu4,ru4]) +conj(th[4])*gdagpmul(Pgamma{4,+1},U[bd4,4,rd4],si[bd4,rd4]) )
|
||||
|
||||
so[b,r] = dmul(Gamma{5}, so[b,r])
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
function krnl_g5Dslsh_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,D}
|
||||
|
||||
@inbounds begin
|
||||
|
||||
b = Int64(CUDA.threadIdx().x);
|
||||
r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
||||
|
||||
function krnl_g5Dslsh_impr!(so, Fcsw, csw, si, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||
|
||||
@inbounds begin
|
||||
|
||||
b = Int64(CUDA.threadIdx().x);
|
||||
r = Int64(CUDA.blockIdx().x)
|
||||
|
||||
if (point_time((b,r),lp) != 1)
|
||||
|
||||
so[b,r] += 0.5*csw*im*dmul(Gamma{5},( Fcsw[b,1,r]*dmul(Gamma{10},si[b,r]) + Fcsw[b,2,r]*dmul(Gamma{11},si[b,r]) + Fcsw[b,3,r]*dmul(Gamma{12},si[b,r])
|
||||
-Fcsw[b,4,r]*dmul(Gamma{15},si[b,r]) - Fcsw[b,5,r]*dmul(Gamma{14},si[b,r]) - Fcsw[b,6,r]*dmul(Gamma{13},si[b,r])))
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
end
|
|
@ -60,6 +60,7 @@ using .Dirac
|
|||
export DiracWorkspace, DiracParam
|
||||
export Dw!, g5Dw!, DwdagDw!, SF_bndfix!, Csw!, pfrandomize!, mtwmdpar
|
||||
export read_prop, save_prop, read_dpar
|
||||
export Nablanabla!, flw, backflow
|
||||
|
||||
include("Solvers/Solvers.jl")
|
||||
using .Solvers
|
||||
|
|
|
@ -17,17 +17,21 @@ function propagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::Space
|
|||
return nothing
|
||||
end
|
||||
|
||||
fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
|
||||
@timeit "Propagator computation" begin
|
||||
|
||||
CUDA.@allowscalar dws.sp[point_index(CartesianIndex{lp.ndim}(y),lp)...] = Spinor{4,SU3fund{Float64}}(ntuple(i -> (i==s)*SU3fund{Float64}(ntuple(j -> (j==c)*1.0,3)...),4))
|
||||
fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
|
||||
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
|
||||
CUDA.@allowscalar dws.sp[point_index(CartesianIndex{lp.ndim}(y),lp)...] = Spinor{4,SU3fund{Float64}}(ntuple(i -> (i==s)*SU3fund{Float64}(ntuple(j -> (j==c)*1.0,3)...),4))
|
||||
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
|
||||
end
|
||||
|
||||
g5Dw!(pro,U,dws.sp,mtwmdpar(dpar),dws,lp)
|
||||
|
||||
niter = CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
|
||||
end
|
||||
|
||||
g5Dw!(pro,U,dws.sp,mtwmdpar(dpar),dws,lp)
|
||||
|
||||
niter = CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
|
||||
return niter
|
||||
end
|
||||
|
||||
|
@ -40,15 +44,20 @@ function propagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::Space
|
|||
return nothing
|
||||
end
|
||||
|
||||
pfrandomize!(dws.sp,lp,time)
|
||||
@timeit "Propagator computation" begin
|
||||
fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
|
||||
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
|
||||
pfrandomize!(dws.sp,lp,time)
|
||||
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
|
||||
end
|
||||
|
||||
g5Dw!(pro,U,dws.sp,mtwmdpar(dpar),dws,lp)
|
||||
|
||||
niter = CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
|
||||
end
|
||||
|
||||
g5Dw!(pro,U,dws.sp,mtwmdpar(dpar),dws,lp)
|
||||
|
||||
niter = CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
|
||||
return niter
|
||||
end
|
||||
|
||||
|
@ -74,27 +83,30 @@ function bndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::Sp
|
|||
r=Int64(CUDA.blockIdx().x)
|
||||
|
||||
if (point_time((b,r),lp) == 2)
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
src[b,r] = gdagpmul(Pgamma{4,1},U[bd4,4,rd4],Spinor{4,SU3fund{Float64}}(ntuple(i -> (i==s)*SU3fund{Float64}(ntuple(j -> (j==c)*1.0,3)...),4)))/2
|
||||
bd4, rd4 = dw((b,r), 4, lp)
|
||||
src[b,r] = gdagpmul(Pgamma{4,1},U[bd4,4,rd4],Spinor{4,SU3fund{Float64}}(ntuple(i -> (i==s)*SU3fund{Float64}(ntuple(j -> (j==c)*1.0,3)...),4)))/2
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
SF_bndfix!(pro,lp)
|
||||
fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
|
||||
@timeit "Propagator computation" begin
|
||||
SF_bndfix!(pro,lp)
|
||||
fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
|
||||
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_bndsrc!(dws.sp, U, lp, c, s)
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_bndsrc!(dws.sp, U, lp, c, s)
|
||||
end
|
||||
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
|
||||
end
|
||||
|
||||
g5Dw!(pro,U,dpar.ct*dws.sp,mtwmdpar(dpar),dws,lp)
|
||||
|
||||
niter = CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
|
||||
end
|
||||
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
|
||||
end
|
||||
|
||||
g5Dw!(pro,U,dpar.ct*dws.sp,mtwmdpar(dpar),dws,lp)
|
||||
|
||||
niter = CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
|
||||
return niter
|
||||
end
|
||||
|
||||
|
@ -120,26 +132,28 @@ function Tbndpropagator!(pro, U, dpar::DiracParam{T}, dws::DiracWorkspace, lp::S
|
|||
r=Int64(CUDA.blockIdx().x)
|
||||
|
||||
if (point_time((b,r),lp) == lp.iL[end])
|
||||
src[b,r] = gpmul(Pgamma{4,-1},U[b,4,r],Spinor{4,SU3fund{Float64}}(ntuple(i -> (i==s)*SU3fund{Float64}(ntuple(j -> (j==c)*1.0,3)...),4)))/2
|
||||
src[b,r] = gpmul(Pgamma{4,-1},U[b,4,r],Spinor{4,SU3fund{Float64}}(ntuple(i -> (i==s)*SU3fund{Float64}(ntuple(j -> (j==c)*1.0,3)...),4)))/2
|
||||
end
|
||||
|
||||
return nothing
|
||||
end
|
||||
|
||||
SF_bndfix!(pro,lp)
|
||||
fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
|
||||
@timeit "Propagator computation" begin
|
||||
fill!(dws.sp,zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
|
||||
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_bndsrc!(dws.sp, U, lp, c, s)
|
||||
end
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_assign_bndsrc!(dws.sp, U, lp, c, s)
|
||||
end
|
||||
|
||||
CUDA.@sync begin
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(dws.sp)
|
||||
end
|
||||
|
||||
|
||||
g5Dw!(pro,U,dpar.ct*dws.sp,mtwmdpar(dpar),dws,lp)
|
||||
|
||||
niter = CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
|
||||
end
|
||||
|
||||
g5Dw!(pro,U,dpar.ct*dws.sp,mtwmdpar(dpar),dws,lp)
|
||||
|
||||
niter = CG!(pro,U,DwdagDw!,dpar,lp,dws,maxiter,tol)
|
||||
return niter
|
||||
end
|
||||
|
||||
|
|
|
@ -204,19 +204,21 @@ Integrates the flow equations with the integration scheme defined by `int` using
|
|||
"""
|
||||
function flw_adapt(U, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugeParm, lp::SpaceParm, ymws::YMworkspace) where {NI,T}
|
||||
|
||||
eps = int.eps_ini
|
||||
eps = epsini
|
||||
dt = tend
|
||||
nstp = 0
|
||||
eps_all = Vector{T}(undef,0)
|
||||
while true
|
||||
ns = convert(Int64, floor(dt/eps))
|
||||
if ns > 10
|
||||
flw(U, int, 9, eps, gp, lp, ymws)
|
||||
ymws.U1 .= U
|
||||
flw(U, int, 2, eps/2, gp, lp, ymws)
|
||||
flw(ymws.U1, int, 1, eps, gp, lp, ymws)
|
||||
flw(U, int, 1, eps, gp, lp, ymws)
|
||||
flw(ymws.U1, int, 2, eps/2, gp, lp, ymws)
|
||||
|
||||
dt = dt - 10*eps
|
||||
nstp = nstp + 10
|
||||
push!(eps_all,ntuple(i->eps,10)...)
|
||||
|
||||
# adjust step size
|
||||
ymws.U1 .= ymws.U1 ./ U
|
||||
|
@ -227,6 +229,9 @@ function flw_adapt(U, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugeParm, lp
|
|||
flw(U, int, ns, eps, gp, lp, ymws)
|
||||
dt = dt - ns*eps
|
||||
|
||||
push!(eps_all,ntuple(i->eps,ns)...)
|
||||
push!(eps_all,dt)
|
||||
|
||||
flw(U, int, 1, dt, gp, lp, ymws)
|
||||
dt = zero(tend)
|
||||
|
||||
|
@ -238,7 +243,7 @@ function flw_adapt(U, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugeParm, lp
|
|||
end
|
||||
end
|
||||
|
||||
return nstp, eps
|
||||
return nstp, eps_all
|
||||
end
|
||||
flw_adapt(U, int::FlowIntr{NI,T}, tend::T, gp::GaugeParm, lp::SpaceParm, ymws::YMworkspace) where {NI,T} = flw_adapt(U, int, tend, int.eps_ini, gp, lp, ymws)
|
||||
|
||||
|
|
42
test/dirac/test_backflow.jl
Normal file
42
test/dirac/test_backflow.jl
Normal file
|
@ -0,0 +1,42 @@
|
|||
using CUDA
|
||||
|
||||
using Pkg
|
||||
|
||||
Pkg.activate("/home/fperez/Git/LGPU_fork_ferflow")
|
||||
|
||||
using LatticeGPU
|
||||
|
||||
lp = SpaceParm{4}((4,4,4,4),(2,2,2,2),0,(0,0,0,0,0,0));
|
||||
|
||||
pso = scalar_field(Spinor{4,SU3fund{Float64}},lp);
|
||||
psi = scalar_field(Spinor{4,SU3fund{Float64}},lp);
|
||||
psi2 = scalar_field(Spinor{4,SU3fund{Float64}},lp);
|
||||
|
||||
ymws = YMworkspace(SU3,Float64,lp);
|
||||
dws = DiracWorkspace(SU3fund,Float64,lp);
|
||||
|
||||
int = wfl_rk3(Float64, 0.01, 1.0)
|
||||
|
||||
gp = GaugeParm{Float64}(SU3{Float64},6.0,1.0,(1.0,0.0),(0.0,0.0),lp.iL)
|
||||
|
||||
dpar = DiracParam{Float64}(SU3fund,1.3,0.9,(1.0,1.0,1.0,1.0),0.0)
|
||||
|
||||
randomize!(ymws.mom, lp, ymws)
|
||||
U = exp.(ymws.mom);
|
||||
|
||||
pfrandomize!(psi,lp)
|
||||
for L in 4:19
|
||||
pso .= psi
|
||||
V = Array(U)
|
||||
a,b = flw_adapt(U, psi, int, L*int.eps, gp,dpar, lp, ymws,dws)
|
||||
# for i in 1:a
|
||||
# flw(U, psi, int, 1 ,b[i], gp, dpar, lp, ymws, dws)
|
||||
# end
|
||||
pfrandomize!(psi2,lp)
|
||||
|
||||
foo = sum(dot.(psi,psi2))# field_dot(psi,psi2,sumf,lp)
|
||||
copyto!(U,V);
|
||||
backflow(psi2,U,L*int.eps,7,gp,dpar,lp, ymws,dws)
|
||||
println("Error:",(sum(dot.(pso,psi2))-foo)/foo)
|
||||
psi .= pso
|
||||
end
|
119
test/dirac/test_backflow_tl.jl
Normal file
119
test/dirac/test_backflow_tl.jl
Normal file
|
@ -0,0 +1,119 @@
|
|||
using LatticeGPU, CUDA, TimerOutputs
|
||||
|
||||
#Test for the relation K(t,y;0,n)^+ Dw(n|m)^{-1} e^(ipm) = D(p)^{-1} exp(4t sin^2(p/2)) e^{ipn} with a given momenta (if p=0 its randomized), spin and color
|
||||
#Kernel en 1207.2096
|
||||
|
||||
|
||||
@timeit "Plw backflow test" begin
|
||||
|
||||
function Dwpw_test(;p=0,s=1,c=1)
|
||||
lp = SpaceParm{4}((16,16,16,16), (4,4,4,4), 0, (0,0,0,0,0,0))
|
||||
gp = GaugeParm{Float64}(SU3{Float64}, 6.0, 1.0)
|
||||
dpar = DiracParam{Float64}(SU3fund,1.3,0.0,(1.0,1.0,1.0,1.0),0.0)
|
||||
dws = DiracWorkspace(SU3fund,Float64,lp);
|
||||
ymws = YMworkspace(SU3,Float64,lp);
|
||||
|
||||
p==0 ? p = Int.(round.(lp.iL.*rand(4),RoundUp)) : nothing
|
||||
U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64}))
|
||||
|
||||
rm = 2* pi* p./(lp.iL)
|
||||
rmom=(rm[1],rm[2],rm[3],rm[4])
|
||||
|
||||
int = wfl_rk3(Float64, 0.01, 1.0)
|
||||
Nsteps = 15
|
||||
|
||||
@timeit "Generate plane wave" begin
|
||||
|
||||
pwave = fill!(scalar_field(Spinor{4,SU3fund{Float64}},lp),zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
|
||||
prop = scalar_field(Spinor{4,SU3fund{Float64}},lp)
|
||||
prop_th = fill!(scalar_field(Spinor{4,SU3fund{Float64}},lp),zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
|
||||
|
||||
|
||||
#Generate plane wave
|
||||
|
||||
for x in 1:lp.iL[1] for y in 1:lp.iL[2] for z in 1:lp.iL[3] for t in 1:lp.iL[4]
|
||||
CUDA.@allowscalar pwave[point_index(CartesianIndex{lp.ndim}((x,y,z,t)),lp)...] = exp(im * (x*rmom[1] + y*rmom[2] + z*rmom[3] + t*rmom[4]))*Spinor{4,SU3fund{Float64}}(ntuple(i -> (i==s)*SU3fund{Float64}(ntuple(j -> (j==c)*1.0,3)...),4))
|
||||
end end end end
|
||||
|
||||
end
|
||||
|
||||
@timeit "Generate analitical solution" begin
|
||||
|
||||
#Th solution
|
||||
|
||||
if s == 1
|
||||
vals = (dpar.m0 + 4.0 - sum(cos.(rmom)),0.0,im*sin(rmom[4])+sin(rmom[3]),im*sin(rmom[2])+sin(rmom[1]))
|
||||
for x in 1:lp.iL[1] for y in 1:lp.iL[2] for z in 1:lp.iL[3] for t in 1:lp.iL[4]
|
||||
CUDA.@allowscalar prop_th[point_index(CartesianIndex{lp.ndim}((x,y,z,t)),lp)...] = exp(im * (x*rmom[1] + y*rmom[2] + z*rmom[3] + t*rmom[4]))*
|
||||
( Spinor{4,SU3fund{Float64}}(ntuple(i -> SU3fund{Float64}(ntuple(j -> (j==c)*vals[i],3)...),4)) )/(sum((sin.(rmom)) .^2) + (dpar.m0+ 4.0 - sum(cos.(rmom)))^2)
|
||||
end end end end
|
||||
elseif s == 2
|
||||
vals = (0.0,dpar.m0 + 4.0 - sum(cos.(rmom)),sin(rmom[1]) - im *sin(rmom[2]),-sin(rmom[3])+im*sin(rmom[4]))
|
||||
for x in 1:lp.iL[1] for y in 1:lp.iL[2] for z in 1:lp.iL[3] for t in 1:lp.iL[4]
|
||||
CUDA.@allowscalar prop_th[point_index(CartesianIndex{lp.ndim}((x,y,z,t)),lp)...] = exp(im * (x*rmom[1] + y*rmom[2] + z*rmom[3] + t*rmom[4]))*
|
||||
( Spinor{4,SU3fund{Float64}}(ntuple(i -> SU3fund{Float64}(ntuple(j -> (j==c)*vals[i],3)...),4)) )/(sum((sin.(rmom)) .^2) + (dpar.m0+ 4.0 - sum(cos.(rmom)))^2)
|
||||
end end end end
|
||||
elseif s == 3
|
||||
vals = (-sin(rmom[3])+im*sin(rmom[4]),-sin(rmom[1])-im*sin(rmom[2]),dpar.m0 + 4.0 - sum(cos.(rmom)),0.0)
|
||||
for x in 1:lp.iL[1] for y in 1:lp.iL[2] for z in 1:lp.iL[3] for t in 1:lp.iL[4]
|
||||
CUDA.@allowscalar prop_th[point_index(CartesianIndex{lp.ndim}((x,y,z,t)),lp)...] = exp(im * (x*rmom[1] + y*rmom[2] + z*rmom[3] + t*rmom[4]))*
|
||||
( Spinor{4,SU3fund{Float64}}(ntuple(i -> SU3fund{Float64}(ntuple(j -> (j==c)*vals[i],3)...),4)) )/(sum((sin.(rmom)) .^2) + (dpar.m0+ 4.0 - sum(cos.(rmom)))^2)
|
||||
end end end end
|
||||
else
|
||||
vals = (-sin(rmom[1])+im*sin(rmom[2]),sin(rmom[3])+im*sin(rmom[4]),0.0,dpar.m0 + 4.0 - sum(cos.(rmom)))
|
||||
for x in 1:lp.iL[1] for y in 1:lp.iL[2] for z in 1:lp.iL[3] for t in 1:lp.iL[4]
|
||||
CUDA.@allowscalar prop_th[point_index(CartesianIndex{lp.ndim}((x,y,z,t)),lp)...] = exp(im * (x*rmom[1] + y*rmom[2] + z*rmom[3] + t*rmom[4]))*
|
||||
( Spinor{4,SU3fund{Float64}}(ntuple(i -> SU3fund{Float64}(ntuple(j -> (j==c)*vals[i],3)...),4)) )/(sum((sin.(rmom)) .^2) + (dpar.m0+ 4.0 - sum(cos.(rmom)))^2)
|
||||
end end end end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
prop_th .= exp(-4*Nsteps*int.eps*sum(sin.(rmom./2).^2))*prop_th
|
||||
|
||||
|
||||
#compute Sum{x} D^-1(x|y)P(y)
|
||||
|
||||
@timeit "Solving propagator and flowing" begin
|
||||
|
||||
function krnlg5!(src)
|
||||
b=Int64(CUDA.threadIdx().x)
|
||||
r=Int64(CUDA.blockIdx().x)
|
||||
src[b,r] = dmul(Gamma{5},src[b,r])
|
||||
return nothing
|
||||
end
|
||||
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(pwave)
|
||||
end
|
||||
g5Dw!(prop,U,pwave,dpar,dws,lp)
|
||||
CG!(prop,U,DwdagDw!,dpar,lp,dws,10000,1.0e-14)
|
||||
|
||||
for _ in 1:Nsteps
|
||||
backflow(U,prop,1,int.eps,gp,dpar,lp, ymws,dws)
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
dif = sum(norm2.(prop - prop_th))
|
||||
|
||||
return dif
|
||||
end
|
||||
|
||||
|
||||
|
||||
begin
|
||||
dif = 0.0
|
||||
for i in 1:3 for j in 1:4
|
||||
dif += Dwpw_test(c=i,s=j)
|
||||
end end
|
||||
|
||||
if dif < 1.0e-5
|
||||
print("Backflow_tl test passed with average error ", dif/12,"!\n")
|
||||
else
|
||||
error("Backflow_tl test failed with difference: ",dif,"\n")
|
||||
end
|
||||
|
||||
|
||||
end
|
||||
end
|
119
test/dirac/test_flow_tl.jl
Normal file
119
test/dirac/test_flow_tl.jl
Normal file
|
@ -0,0 +1,119 @@
|
|||
using LatticeGPU, CUDA, TimerOutputs
|
||||
|
||||
#Test for the relation K(t,y;0,n) Dw(n|m)^{-1} e^(ipm) = D(p)^{-1} exp(-4t sin^2(p/2)) e^{ipn} with a given momenta (if p=0 its randomized), spin and color
|
||||
#Kernel en 1207.2096
|
||||
|
||||
|
||||
@timeit "Plw flow test" begin
|
||||
|
||||
function Dwpw_test(;p=0,s=1,c=1)
|
||||
lp = SpaceParm{4}((16,16,16,16), (4,4,4,4), 0, (0,0,0,0,0,0))
|
||||
gp = GaugeParm{Float64}(SU3{Float64}, 6.0, 1.0)
|
||||
dpar = DiracParam{Float64}(SU3fund,1.3,0.0,(1.0,1.0,1.0,1.0),0.0)
|
||||
dws = DiracWorkspace(SU3fund,Float64,lp);
|
||||
ymws = YMworkspace(SU3,Float64,lp);
|
||||
|
||||
p==0 ? p = Int.(round.(lp.iL.*rand(4),RoundUp)) : nothing
|
||||
U = fill!(vector_field(SU3{Float64},lp),one(SU3{Float64}))
|
||||
|
||||
rm = 2* pi* p./(lp.iL)
|
||||
rmom=(rm[1],rm[2],rm[3],rm[4])
|
||||
|
||||
int = wfl_rk3(Float64, 0.01, 1.0)
|
||||
Nsteps = 15
|
||||
|
||||
@timeit "Generate plane wave" begin
|
||||
|
||||
pwave = fill!(scalar_field(Spinor{4,SU3fund{Float64}},lp),zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
|
||||
prop = scalar_field(Spinor{4,SU3fund{Float64}},lp)
|
||||
prop_th = fill!(scalar_field(Spinor{4,SU3fund{Float64}},lp),zero(eltype(scalar_field(Spinor{4,SU3fund{Float64}},lp))))
|
||||
|
||||
|
||||
#Generate plane wave
|
||||
|
||||
for x in 1:lp.iL[1] for y in 1:lp.iL[2] for z in 1:lp.iL[3] for t in 1:lp.iL[4]
|
||||
CUDA.@allowscalar pwave[point_index(CartesianIndex{lp.ndim}((x,y,z,t)),lp)...] = exp(im * (x*rmom[1] + y*rmom[2] + z*rmom[3] + t*rmom[4]))*Spinor{4,SU3fund{Float64}}(ntuple(i -> (i==s)*SU3fund{Float64}(ntuple(j -> (j==c)*1.0,3)...),4))
|
||||
end end end end
|
||||
|
||||
end
|
||||
|
||||
@timeit "Generate analitical solution" begin
|
||||
|
||||
#Th solution
|
||||
|
||||
if s == 1
|
||||
vals = (dpar.m0 + 4.0 - sum(cos.(rmom)),0.0,im*sin(rmom[4])+sin(rmom[3]),im*sin(rmom[2])+sin(rmom[1]))
|
||||
for x in 1:lp.iL[1] for y in 1:lp.iL[2] for z in 1:lp.iL[3] for t in 1:lp.iL[4]
|
||||
CUDA.@allowscalar prop_th[point_index(CartesianIndex{lp.ndim}((x,y,z,t)),lp)...] = exp(im * (x*rmom[1] + y*rmom[2] + z*rmom[3] + t*rmom[4]))*
|
||||
( Spinor{4,SU3fund{Float64}}(ntuple(i -> SU3fund{Float64}(ntuple(j -> (j==c)*vals[i],3)...),4)) )/(sum((sin.(rmom)) .^2) + (dpar.m0+ 4.0 - sum(cos.(rmom)))^2)
|
||||
end end end end
|
||||
elseif s == 2
|
||||
vals = (0.0,dpar.m0 + 4.0 - sum(cos.(rmom)),sin(rmom[1]) - im *sin(rmom[2]),-sin(rmom[3])+im*sin(rmom[4]))
|
||||
for x in 1:lp.iL[1] for y in 1:lp.iL[2] for z in 1:lp.iL[3] for t in 1:lp.iL[4]
|
||||
CUDA.@allowscalar prop_th[point_index(CartesianIndex{lp.ndim}((x,y,z,t)),lp)...] = exp(im * (x*rmom[1] + y*rmom[2] + z*rmom[3] + t*rmom[4]))*
|
||||
( Spinor{4,SU3fund{Float64}}(ntuple(i -> SU3fund{Float64}(ntuple(j -> (j==c)*vals[i],3)...),4)) )/(sum((sin.(rmom)) .^2) + (dpar.m0+ 4.0 - sum(cos.(rmom)))^2)
|
||||
end end end end
|
||||
elseif s == 3
|
||||
vals = (-sin(rmom[3])+im*sin(rmom[4]),-sin(rmom[1])-im*sin(rmom[2]),dpar.m0 + 4.0 - sum(cos.(rmom)),0.0)
|
||||
for x in 1:lp.iL[1] for y in 1:lp.iL[2] for z in 1:lp.iL[3] for t in 1:lp.iL[4]
|
||||
CUDA.@allowscalar prop_th[point_index(CartesianIndex{lp.ndim}((x,y,z,t)),lp)...] = exp(im * (x*rmom[1] + y*rmom[2] + z*rmom[3] + t*rmom[4]))*
|
||||
( Spinor{4,SU3fund{Float64}}(ntuple(i -> SU3fund{Float64}(ntuple(j -> (j==c)*vals[i],3)...),4)) )/(sum((sin.(rmom)) .^2) + (dpar.m0+ 4.0 - sum(cos.(rmom)))^2)
|
||||
end end end end
|
||||
else
|
||||
vals = (-sin(rmom[1])+im*sin(rmom[2]),sin(rmom[3])+im*sin(rmom[4]),0.0,dpar.m0 + 4.0 - sum(cos.(rmom)))
|
||||
for x in 1:lp.iL[1] for y in 1:lp.iL[2] for z in 1:lp.iL[3] for t in 1:lp.iL[4]
|
||||
CUDA.@allowscalar prop_th[point_index(CartesianIndex{lp.ndim}((x,y,z,t)),lp)...] = exp(im * (x*rmom[1] + y*rmom[2] + z*rmom[3] + t*rmom[4]))*
|
||||
( Spinor{4,SU3fund{Float64}}(ntuple(i -> SU3fund{Float64}(ntuple(j -> (j==c)*vals[i],3)...),4)) )/(sum((sin.(rmom)) .^2) + (dpar.m0+ 4.0 - sum(cos.(rmom)))^2)
|
||||
end end end end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
prop_th .= exp(-4*Nsteps*int.eps*sum(sin.(rmom./2).^2))*prop_th
|
||||
|
||||
|
||||
#compute Sum{x} D^-1(x|y)P(y)
|
||||
|
||||
@timeit "Solving propagator and flowing" begin
|
||||
|
||||
function krnlg5!(src)
|
||||
b=Int64(CUDA.threadIdx().x)
|
||||
r=Int64(CUDA.blockIdx().x)
|
||||
src[b,r] = dmul(Gamma{5},src[b,r])
|
||||
return nothing
|
||||
end
|
||||
|
||||
CUDA.@sync begin
|
||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnlg5!(pwave)
|
||||
end
|
||||
g5Dw!(prop,U,pwave,dpar,dws,lp)
|
||||
CG!(prop,U,DwdagDw!,dpar,lp,dws,10000,1.0e-14)
|
||||
|
||||
flw(U, prop, int, Nsteps ,int.eps, gp, dpar, lp, ymws, dws)
|
||||
end
|
||||
|
||||
|
||||
dif = sum(norm2.(prop - prop_th))
|
||||
|
||||
|
||||
return dif
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
begin
|
||||
dif = 0.0
|
||||
for i in 1:3 for j in 1:4
|
||||
dif += Dwpw_test(c=i,s=j)
|
||||
end end
|
||||
|
||||
if dif < 1.0e-4
|
||||
print("Flow_tl test passed with average error ", dif/12,"!\n")
|
||||
else
|
||||
error("Flow_tl test failed with difference: ",dif,"\n")
|
||||
end
|
||||
|
||||
|
||||
end
|
||||
end
|
Loading…
Add table
Add a link
Reference in a new issue