mirror of
https://igit.ific.uv.es/alramos/latticegpu.jl.git
synced 2025-05-14 19:23:42 +02:00
Memory optimization in bfl. Fermion adaptive step.
This commit is contained in:
parent
5fc81739f8
commit
6f1c8a08fd
1 changed files with 11 additions and 13 deletions
|
@ -108,8 +108,7 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam
|
||||||
|
|
||||||
@timeit "Backflow step" begin
|
@timeit "Backflow step" begin
|
||||||
|
|
||||||
V = copy(U)
|
@timeit "GPU to CPU" V = Array(U)
|
||||||
V .= U
|
|
||||||
|
|
||||||
force_gauge(ymws, U, int.c0, 1, gp, lp)
|
force_gauge(ymws, U, int.c0, 1, gp, lp)
|
||||||
|
|
||||||
|
@ -131,7 +130,7 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam
|
||||||
|
|
||||||
Nablanabla!(dws.sp, U, 0.75*2*eps*psi, dpar, dws, lp)
|
Nablanabla!(dws.sp, U, 0.75*2*eps*psi, dpar, dws, lp)
|
||||||
|
|
||||||
U .= V
|
@timeit "CPU to GPU" copyto!(U,V)
|
||||||
|
|
||||||
force_gauge(ymws, U, int.c0, 1, gp, lp)
|
force_gauge(ymws, U, int.c0, 1, gp, lp)
|
||||||
|
|
||||||
|
@ -144,7 +143,7 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam
|
||||||
Nablanabla!(dws.sAp, U, 2*eps*dws.sp, dpar, dws, lp)
|
Nablanabla!(dws.sAp, U, 2*eps*dws.sp, dpar, dws, lp)
|
||||||
dws.sAp .= psi + (8/9)*dws.sAp
|
dws.sAp .= psi + (8/9)*dws.sAp
|
||||||
|
|
||||||
U .= V
|
@timeit "CPU to GPU" copyto!(U,V)
|
||||||
|
|
||||||
Nablanabla!(psi, U, 2*eps*(dws.sAp - (8/9)*dws.sp), dpar, dws, lp)
|
Nablanabla!(psi, U, 2*eps*(dws.sAp - (8/9)*dws.sp), dpar, dws, lp)
|
||||||
psi .= (1/4)*psi + dws.sp + dws.sAp
|
psi .= (1/4)*psi + dws.sp + dws.sAp
|
||||||
|
@ -166,8 +165,9 @@ function flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugePar
|
||||||
if ns > 10
|
if ns > 10
|
||||||
flw(U, psi, int, 9, eps, gp, dpar, lp, ymws, dws)
|
flw(U, psi, int, 9, eps, gp, dpar, lp, ymws, dws)
|
||||||
ymws.U1 .= U
|
ymws.U1 .= U
|
||||||
|
dws.sr .= psi
|
||||||
flw(U, psi, int, 1, eps, gp, dpar, lp, ymws, dws)
|
flw(U, psi, int, 1, eps, gp, dpar, lp, ymws, dws)
|
||||||
flw(ymws.U1, int, 2, eps/2, gp, lp, ymws)
|
flw(ymws.U1,dws.sr, int, 2, eps/2, gp, dpar,lp, ymws,dws)
|
||||||
|
|
||||||
dt = dt - 10*eps
|
dt = dt - 10*eps
|
||||||
nstp = nstp + 10
|
nstp = nstp + 10
|
||||||
|
@ -175,8 +175,10 @@ function flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugePar
|
||||||
|
|
||||||
# adjust step size
|
# adjust step size
|
||||||
ymws.U1 .= ymws.U1 ./ U
|
ymws.U1 .= ymws.U1 ./ U
|
||||||
|
dws.sr .= dws.sr .- psi
|
||||||
maxd = CUDA.mapreduce(dev_one, max, ymws.U1, init=zero(tend))
|
maxd = CUDA.mapreduce(dev_one, max, ymws.U1, init=zero(tend))
|
||||||
eps = min(int.max_eps, 2*eps, int.sft_fac*eps*(int.tol/maxd)^(one(tend)/3))
|
pfdist = sqrt(CUDA.mapreduce(norm2, max, dws.sr, init=zero(tend)))
|
||||||
|
eps = min(int.max_eps, 2*eps, int.sft_fac*eps*(int.tol/maxd)^(one(tend)/3),int.sft_fac*eps*(int.tol/pfdist)^(one(tend)/3))
|
||||||
|
|
||||||
else
|
else
|
||||||
flw(U, psi, int, ns, eps, gp, dpar, lp, ymws, dws)
|
flw(U, psi, int, ns, eps, gp, dpar, lp, ymws, dws)
|
||||||
|
@ -205,7 +207,7 @@ flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, gp::GaugeParm, dpar::DiracParam,
|
||||||
|
|
||||||
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D})
|
||||||
|
|
||||||
Computes /`/` \\nabla^* \\nabla /`/` `si` and stores it in `si`.
|
Computes /`/` \\nabla^* \\nabla /`/` `si` and stores it in `so`.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D}
|
||||||
|
@ -216,6 +218,7 @@ function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Space
|
||||||
end
|
end
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}) where {D}
|
function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}) where {D}
|
||||||
SF_bndfix!(si,lp)
|
SF_bndfix!(si,lp)
|
||||||
@timeit "Laplacian" begin
|
@timeit "Laplacian" begin
|
||||||
|
@ -238,7 +241,7 @@ function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,BC_OPEN,D}) where {D}
|
||||||
|
|
||||||
so[b,r] = -4*si[b,r]
|
so[b,r] = -4*si[b,r]
|
||||||
|
|
||||||
bu1, ru1 = up((b,r), 1, lp)
|
bu1, ru1 = up((b,r), 1, lp)
|
||||||
bd1, rd1 = dw((b,r), 1, lp)
|
bd1, rd1 = dw((b,r), 1, lp)
|
||||||
bu2, ru2 = up((b,r), 2, lp)
|
bu2, ru2 = up((b,r), 2, lp)
|
||||||
bd2, rd2 = dw((b,r), 2, lp)
|
bd2, rd2 = dw((b,r), 2, lp)
|
||||||
|
@ -313,7 +316,6 @@ function krnl_Nablanabla(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},Sp
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
export Nablanabla!, flw, backflow, flw_adapt, bflw_step!
|
export Nablanabla!, flw, backflow, flw_adapt, bflw_step!
|
||||||
|
|
||||||
|
|
||||||
|
@ -362,7 +364,6 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
function krnl_g5Dslsh!(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
function krnl_g5Dslsh!(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
@ -393,7 +394,6 @@ function krnl_g5Dslsh!(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},Spac
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
function krnl_g5Dslsh!(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {D,B}
|
function krnl_g5Dslsh!(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {D,B}
|
||||||
|
|
||||||
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x)
|
||||||
|
@ -436,8 +436,6 @@ function krnl_g5Dslsh_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B,
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
function krnl_g5Dslsh_impr!(so, Fcsw, csw, si, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
function krnl_g5Dslsh_impr!(so, Fcsw, csw, si, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D}
|
||||||
|
|
||||||
@inbounds begin
|
@inbounds begin
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue