diff --git a/src/Dirac/Diracflow.jl b/src/Dirac/Diracflow.jl index 8065840..62529e2 100644 --- a/src/Dirac/Diracflow.jl +++ b/src/Dirac/Diracflow.jl @@ -108,8 +108,7 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam @timeit "Backflow step" begin - V = copy(U) - V .= U + @timeit "GPU to CPU" V = Array(U) force_gauge(ymws, U, int.c0, 1, gp, lp) @@ -131,7 +130,7 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam Nablanabla!(dws.sp, U, 0.75*2*eps*psi, dpar, dws, lp) - U .= V + @timeit "CPU to GPU" copyto!(U,V) force_gauge(ymws, U, int.c0, 1, gp, lp) @@ -144,7 +143,7 @@ function bflw_step!(psi, U, eps, int::FlowIntr, gp::GaugeParm, dpar::DiracParam Nablanabla!(dws.sAp, U, 2*eps*dws.sp, dpar, dws, lp) dws.sAp .= psi + (8/9)*dws.sAp - U .= V + @timeit "CPU to GPU" copyto!(U,V) Nablanabla!(psi, U, 2*eps*(dws.sAp - (8/9)*dws.sp), dpar, dws, lp) psi .= (1/4)*psi + dws.sp + dws.sAp @@ -166,8 +165,9 @@ function flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugePar if ns > 10 flw(U, psi, int, 9, eps, gp, dpar, lp, ymws, dws) ymws.U1 .= U + dws.sr .= psi flw(U, psi, int, 1, eps, gp, dpar, lp, ymws, dws) - flw(ymws.U1, int, 2, eps/2, gp, lp, ymws) + flw(ymws.U1,dws.sr, int, 2, eps/2, gp, dpar,lp, ymws,dws) dt = dt - 10*eps nstp = nstp + 10 @@ -175,8 +175,10 @@ function flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, epsini::T, gp::GaugePar # adjust step size ymws.U1 .= ymws.U1 ./ U + dws.sr .= dws.sr .- psi maxd = CUDA.mapreduce(dev_one, max, ymws.U1, init=zero(tend)) - eps = min(int.max_eps, 2*eps, int.sft_fac*eps*(int.tol/maxd)^(one(tend)/3)) + pfdist = sqrt(CUDA.mapreduce(norm2, max, dws.sr, init=zero(tend))) + eps = min(int.max_eps, 2*eps, int.sft_fac*eps*(int.tol/maxd)^(one(tend)/3),int.sft_fac*eps*(int.tol/pfdist)^(one(tend)/3)) else flw(U, psi, int, ns, eps, gp, dpar, lp, ymws, dws) @@ -205,7 +207,7 @@ flw_adapt(U, psi, int::FlowIntr{NI,T}, tend::T, gp::GaugeParm, dpar::DiracParam, function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,B,D}) -Computes /`/` \\nabla^* \\nabla /`/` `si` and stores it in `si`. +Computes /`/` \\nabla^* \\nabla /`/` `si` and stores it in `so`. """ function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceParm{4,6,BC_PERIODIC,D}) where {D} @@ -216,6 +218,7 @@ function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Space end return nothing end + function Nablanabla!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D},SpaceParm{4,6,BC_OPEN,D}}) where {D} SF_bndfix!(si,lp) @timeit "Laplacian" begin @@ -238,7 +241,7 @@ function krnl_Nablanabla(so, U, si, th, lp::SpaceParm{4,6,BC_OPEN,D}) where {D} so[b,r] = -4*si[b,r] - bu1, ru1 = up((b,r), 1, lp) + bu1, ru1 = up((b,r), 1, lp) bd1, rd1 = dw((b,r), 1, lp) bu2, ru2 = up((b,r), 2, lp) bd2, rd2 = dw((b,r), 2, lp) @@ -313,7 +316,6 @@ function krnl_Nablanabla(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},Sp end - export Nablanabla!, flw, backflow, flw_adapt, bflw_step! @@ -362,7 +364,6 @@ function Dslash_sq!(so, U, si, dpar::DiracParam, dws::DiracWorkspace, lp::SpaceP return nothing end - function krnl_g5Dslsh!(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D} b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x) @@ -393,7 +394,6 @@ function krnl_g5Dslsh!(so, U, si, th, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},Spac return nothing end - function krnl_g5Dslsh!(so, U, si, th, lp::SpaceParm{4,6,B,D}) where {D,B} b = Int64(CUDA.threadIdx().x); r = Int64(CUDA.blockIdx().x) @@ -436,8 +436,6 @@ function krnl_g5Dslsh_impr!(so, Fcsw, csw, si, lp::SpaceParm{4,6,B,D}) where {B, return nothing end - - function krnl_g5Dslsh_impr!(so, Fcsw, csw, si, lp::Union{SpaceParm{4,6,BC_SF_ORBI,D},SpaceParm{4,6,BC_SF_AFWB,D}}) where {D} @inbounds begin