Memory optimization in bfl. Fermion adaptive step.

This commit is contained in:
Fernando P.Panadero 2024-09-09 15:37:42 +02:00
parent 5fc81739f8
commit 6f1c8a08fd

View file

@ -108,8 +108,7 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam
@timeit "Backflow step" begin
V = copy(U)
V .= U
@timeit "GPU to CPU" V = Array(U)
force_gauge(ymws, U, int.c0, 1, gp, lp)
@ -131,7 +130,7 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam
Nablanabla!(dws.sp, U, 0.75*2*eps*psi, dpar, dws, lp)
U .= V
@timeit "CPU to GPU" copyto!(U,V)
force_gauge(ymws, U, int.c0, 1, gp, lp)
@ -144,7 +143,7 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam
Nablanabla!(dws.sAp, U, 2*eps*dws.sp, dpar, dws, lp)
dws.sAp .= psi + (8/9)*dws.sAp
U .= V
@timeit "CPU to GPU" copyto!(U,V)
Nablanabla!(psi, U, 2*eps*(dws.sAp - (8/9)*dws.sp), dpar, dws, lp)
psi .= (1/4)*psi + dws.sp + dws.sAp
@ -166,8 +165,9 @@ function flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugePar
if ns > 10
flw(U, psi, int, 9, eps, gp, dpar, lp, ymws, dws)
ymws.U1 .= U
dws.sr .= psi
flw(U, psi, int, 1, eps, gp, dpar, lp, ymws, dws)
flw(ymws.U1, int, 2, eps/2, gp, lp, ymws)
flw(ymws.U1,dws.sr, int, 2, eps/2, gp, dpar,lp, ymws,dws)
dt = dt - 10*eps
nstp = nstp + 10
@ -175,8 +175,10 @@ function flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugePar
# adjust step size
ymws.U1 .= ymws.U1 ./ U
dws.sr .= dws.sr .- psi
maxd = CUDA.mapreduce(dev_one, max, ymws.U1, init=zero(tend))
eps = min(int.max_eps, 2*eps, int.sft_fac*eps*(int.tol/maxd)^(one(tend)/3))
pfdist = sqrt(CUDA.mapreduce(norm2, max, dws.sr, init=zero(tend)))
eps = min(int.max_eps, 2*eps, int.sft_fac*eps*(int.tol/maxd)^(one(tend)/3),int.sft_fac*eps*(int.tol/pfdist)^(one(tend)/3))
else
flw(U, psi, int, ns, eps, gp, dpar, lp, ymws, dws)
@ -205,7 +207,7 @@ flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, gp::GaugeParm, dpar::DiracParam,
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
Computes /`/` \\nabla^* \\nabla /`/` `si` and stores it in `si`.
Computes /`/` \\nabla^* \\nabla /`/` `si` and stores it in `so`.
"""
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
@ -216,6 +218,7 @@ function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Space
end
return nothing
end
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}) where {D}
SF_bndfix!(si,lp)
@timeit "Laplacian" begin
@ -238,7 +241,7 @@ function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
so[b,r] = -4*si[b,r]
bu1, ru1 = up((b,r), 1, lp)
bu1, ru1 = up((b,r), 1, lp)
bd1, rd1 = dw((b,r), 1, lp)
bu2, ru2 = up((b,r), 2, lp)
bd2, rd2 = dw((b,r), 2, lp)
@ -313,7 +316,6 @@ function krnl_Nablanabla(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},Sp
end
export Nablanabla!, flw, backflow, flw_adapt, bflw_step!
@ -362,7 +364,6 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP
return nothing
end
function krnl_g5Dslsh!(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
@ -393,7 +394,6 @@ function krnl_g5Dslsh!(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},Spac
return nothing
end
function krnl_g5Dslsh!(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {D,B}
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
@ -436,8 +436,6 @@ function krnl_g5Dslsh_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,
return nothing
end
function krnl_g5Dslsh_impr!(so, Fcsw, csw, si, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
@inbounds begin