From 48b3bf45378b961e378b913375ac2c7f317e2194 Mon Sep 17 00:00:00 2001 From: "Fernando P.Panadero" Date: Thu, 16 May 2024 14:43:29 +0200 Subject: [PATCH] YM OBC first test --- src/YM/YMact.jl | 686 ++++++++++++++++++++++++++++++++++++++++++----- src/YM/YMflow.jl | 42 ++- 2 files changed, 650 insertions(+), 78 deletions(-) diff --git a/src/YM/YMact.jl b/src/YM/YMact.jl index 35e965b..e0ec614 100644 --- a/src/YM/YMact.jl +++ b/src/YM/YMact.jl @@ -9,7 +9,314 @@ ### created: Mon Jul 12 18:31:19 2021 ### -function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, ztw, lp::SpaceParm{N,M,B,D}) where {T,NB,N,M,B,D} + +## +## OPEN DEBERIA ESTAR BIEN LAS ACCIONES, CHECKEAR. LAS FUERZAS FALTAN +## +function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, ztw, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,NB,N,M,D} + + b = Int64(CUDA.threadIdx().x) + r = Int64(CUDA.blockIdx().x) + I = point_coord((b,r), lp) + it = I[N] + + ipl = 0 + S = zero(eltype(plx)) + @inbounds begin + for id1 in N:-1:1 + bu1, ru1 = up((b, r), id1, lp) + TOBC = (id1==N) + + for id2 = 1:id1-1 + bu2, ru2 = up((b, r), id2, lp) + ipl = ipl + 1 + + TWP = (I[id1]==1) && (I[id2]==1) + TWH2 = TWP || ( (I[id1]==lp.iL[id1]) && (I[id2]==1) ) + TWH3 = TWP || ( (I[id1]==1) && (I[id2]==lp.iL[id2]) ) + + # H2 staple + (b1, r1) = up((b,r), id1, lp) + (b2, r2) = up((b1,r1), id1, lp) + gb = U[b2,id2,r2] + + (b2, r2) = up((b1,r1), id2, lp) + h2 = (U[b1,id1,r1]*gb)/U[b2,id1,r2] + + # H3 staple + (b1, r1) = up((b,r), id2, lp) + (b2, r2) = up((b1,r1), id2, lp) + + (b3, r3) = up((b1,r1), id1, lp) + + gc = U[b3,id2,r3] + + h3 = (U[b1,id2,r1]*U[b2,id1,r2])/gc + # END staples + + ga = U[bu1,id2,ru1] + + g2 = U[b,id2,r]\U[b,id1,r] + + if ( (it == lp.iL[end]) || (it == 1) ) && !TOBC + S += 0.5*cG*(c0*tr(g2*ga/U[bu2,id1,ru2]) + c1*tr(g2*ga/h3) + c1*tr(g2*h2/U[bu2,id1,ru2])) + elseif (it == lp.iL[end]-1) && TOBC + S += c0*tr(g2*ga/U[bu2,id1,ru2]) + c1*tr(g2*ga/h3) + elseif (it == lp.iL[end]) && TOBC + nothing + else + if TWP + S += (ztw[ipl]*c0)*tr(g2*ga/U[bu2,id1,ru2]) + else + S += c0*tr(g2*ga/U[bu2,id1,ru2]) + end + if TWH2 + S += (ztw[ipl]*c1)*tr(g2*h2/U[bu2,id1,ru2]) + else + S += c1*tr(g2*h2/U[bu2,id1,ru2]) + end + if TWH3 + S += (ztw[ipl]*c1)*tr(g2*ga/h3) + else + S += c1*tr(g2*ga/h3) + end + end + + end + end + + plx[I] = S + end + + return nothing +end + +function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,N,M,D} + + @inbounds begin + b = Int64(CUDA.threadIdx().x) + r = Int64(CUDA.blockIdx().x) + I = point_coord((b,r), lp) + it = I[N] + + S = zero(eltype(plx)) + ipl = 0 + for id1 in N:-1:1 + bu1, ru1 = up((b, r), id1, lp) + TOBC = (id1==N) + + for id2 = 1:id1-1 + bu2, ru2 = up((b, r), id2, lp) + ipl = ipl + 1 + TWP = (I[id1]==1) && (I[id2]==1) + + gt1 = U[bu1,id2,ru1] + + if ( (it == lp.iL[end]) || (it == 1)) && !TOBC + S += 0.5*cG*(c0*tr(g2*ga/U[bu2,id1,ru2])) + elseif (it == lp.iL[end]) && TOBC + nothing + else + if TWP + S += ztw[ipl]*tr(U[b,id1,r]*gt1 / (U[b,id2,r]*U[bu2,id1,ru2])) + else + S += tr(U[b,id1,r]*gt1 / (U[b,id2,r]*U[bu2,id1,ru2])) + end + end + end + end + + plx[I] = S + end + + return nothing +end + +function krnl_force_wilson_pln!(frc1, frc2, U::AbstractArray{T}, Ubnd, cG, ztw, ipl, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,N,M,D} + + b = Int64(CUDA.threadIdx().x) + r = Int64(CUDA.blockIdx().x) + I = point_coord((b,r), lp) + it = I[N] + + @inbounds begin + id1, id2 = lp.plidx[ipl] + bu1, ru1 = up((b, r), id1, lp) + bu2, ru2 = up((b, r), id2, lp) + TWP = (I[id1]==1)&&(I[id2]==1) + + TOBC = (id1 == N) + + gt1 = U[bu1,id2,ru1] + + g1 = gt1/U[bu2,id1,ru2] + g2 = U[b,id2,r]\U[b,id1,r] + + if !TOBC && ( (it == 1) || (it == lp.iL[end]) ) + X = 0.5*cG*projalg(U[b,id1,r]*g1/U[b,id2,r]) + + frc1[b ,id1, r ] -= X + frc1[b ,id2, r ] += X + frc2[bu1,id2,ru1] -= 0.5*cG*projalg(g1*g2) + frc2[bu2,id1,ru2] += 0.5*cG*projalg(g2*g1) + elseif TOBC && (it == lp.iL[end]) + nothing + else + if TWP + X = projalg(ztw,U[b,id1,r]*g1/U[b,id2,r]) + frc2[bu1,id2,ru1] -= projalg(ztw,g1*g2) + frc2[bu2,id1,ru2] += projalg(ztw,g2*g1) + else + X = projalg(U[b,id1,r]*g1/U[b,id2,r]) + frc2[bu1,id2,ru1] -= projalg(g1*g2) + frc2[bu2,id1,ru2] += projalg(g2*g1) + end + frc1[b ,id1, r ] -= X + frc1[b ,id2, r ] += X + end + end + + return nothing +end + +function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG, ztw, ipl, lp::SpaceParm{N,M,BC_OPEN,D}) where {T,N,M,D} + + b = Int64(CUDA.threadIdx().x) + r = Int64(CUDA.blockIdx().x) + I = point_coord((b,r), lp) + it = I[N] + + @inbounds begin + id1, id2 = lp.plidx[ipl] + bu1, ru1 = up((b, r), id1, lp) + bu2, ru2 = up((b, r), id2, lp) + + TOBC = (id1 == N) + TWP = (I[id1]==1) && (I[id2]==1) + TWH1 = TWP || ( (I[id1]==1) && (I[id2]==2) ) + TWH2 = TWP || ( (I[id1]==lp.iL[id1]) && (I[id2]==1) ) + TWH3 = TWP || ( (I[id1]==1) && (I[id2]==lp.iL[id2]) ) + TWH4 = TWP || ( (I[id1]==2) && (I[id2]==1) ) + + # H1 staple + (b1, r1) = dw((b,r), id2, lp) + (b2, r2) = up((b1,r1), id1, lp) + gc = U[b2,id2,r2] + h1 = (U[b1,id2,r1]\U[b1,id1,r1])*gc + + # H2 staple + (b1, r1) = up((b,r), id1, lp) + (b2, r2) = up((b1,r1), id1, lp) + gb = U[b2,id2,r2] + + (b2, r2) = up((b1,r1), id2, lp) + h2 = (U[b1,id1,r1]*gb)/U[b2,id1,r2] + + # H3 staple + (b1, r1) = up((b,r), id2, lp) + (b2, r2) = up((b1,r1), id2, lp) + (b3, r3) = up((b1,r1), id1, lp) + gc = U[b3,id2,r3] + h3 = (U[b1,id2,r1]*U[b2,id1,r2])/gc + + # H4 staple + (b1, r1) = dw((b,r), id1, lp) + (b2, r2) = up((b1,r1), id2, lp) + h4 = (U[b1,id1,r1]\U[b1,id2,r1])*U[b2,id1,r2] + # END staples + + ga = U[bu1,id2,ru1] + + g1 = ga/U[bu2,id1,ru2] + g2 = U[b,id2,r]\U[b,id1,r] + + if !TOBC && ( (it == 1) || (it == lp.iL[end]) ) + X = 0.5*cG*(c0*projalg(U[b,id1,r]*g1/U[b,id2,r]) + c1*projalg(U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2])) + + c1*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3)) + c1*projalg(h1*g1/U[b,id2,r]) + c1*projalg(U[b,id1,r]*g1/h4)) + + frc1[b,id1,r] -= X + frc1[b,id2,r] += X + frc2[bu1,id2,ru1] -= 0.5*cG*c0*projalg(g1*g2) + frc2[bu2,id1,ru2] += 0.5*cG*c0*projalg(g2*g1) + frc2[bu1,id2,ru1] -= 0.5*cG*c1*projalg((g1/U[b,id2,r])*h1) + frc2[bu2,id1,ru2] += 0.5*cG*c1*projalg((U[b,id2,r]\h1)*g1) + frc2[bu2,id1,ru2] += 0.5*cG*c1*projalg(g2*h2/U[bu2,id1,ru2]) + frc2[bu1,id2,ru1] -= 0.5*cG*c1*projalg((ga/h3)*g2) + frc2[bu1,id2,ru1] -= 0.5*cG*c1*projalg((g1/h4)*U[b,id1,r]) + frc2[bu2,id1,ru2] += 0.5*cG*c1*projalg(h4\U[b,id1,r]*g1) + elseif TOBC && (it == lp.iL[end]) + nothing + elseif TOBC && (it == (lp.iL[end]-1) ) + X = (c0*projalg(U[b,id1,r]*g1/U[b,id2,r]) + c1*projalg(U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2])) + + c1*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3)) + c1*projalg(h1*g1/U[b,id2,r]) + c1*projalg(U[b,id1,r]*g1/h4)) + + frc1[b,id1,r] -= X + frc1[b,id2,r] += X + frc2[bu1,id2,ru1] -= c0*projalg(g1*g2) + frc2[bu2,id1,ru2] += c0*projalg(g2*g1) + frc2[bu1,id2,ru1] -= c1*projalg((g1/U[b,id2,r])*h1) + frc2[bu2,id1,ru2] += c1*projalg((U[b,id2,r]\h1)*g1) + frc2[bu1,id2,ru1] -= c1*projalg((ga/h3)*g2) + frc2[bu1,id2,ru1] -= c1*projalg((g1/h4)*U[b,id1,r]) + frc2[bu2,id1,ru2] += c1*projalg(h4\U[b,id1,r]*g1) + + else + if TWP + X = projalg(c0*ztw,U[b,id1,r]*g1/U[b,id2,r]) + frc2[bu1,id2,ru1] -= projalg(c0*ztw,g1*g2) + frc2[bu2,id1,ru2] += projalg(c0*ztw,g2*g1) + else + X = c0*projalg(U[b,id1,r]*g1/U[b,id2,r]) + frc2[bu1,id2,ru1] -= c0*projalg(g1*g2) + frc2[bu2,id1,ru2] += c0*projalg(g2*g1) + end + if TWH1 + frc1[b,id2,r] += projalg(ztw*c1,h1*g1/U[b,id2,r]) + frc2[bu1,id2,ru1] -= projalg(ztw*c1,(g1/U[b,id2,r])*h1) + frc2[bu2,id1,ru2] += projalg(ztw*c1,(U[b,id2,r]\h1)*g1) + else + frc1[b,id2,r] += c1*projalg(h1*g1/U[b,id2,r]) + frc2[bu1,id2,ru1] -= c1*projalg((g1/U[b,id2,r])*h1) + frc2[bu2,id1,ru2] += c1*projalg((U[b,id2,r]\h1)*g1) + end + if TWH2 + X += projalg(ztw*c1,U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2])) + frc2[bu2,id1,ru2] += projalg(ztw*c1,g2*h2/U[bu2,id1,ru2]) + else + X += c1*projalg(U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2])) + frc2[bu2,id1,ru2] += c1*projalg(g2*h2/U[bu2,id1,ru2]) + end + if TWH3 + X += projalg(ztw*c1,U[b,id1,r]*ga/(U[b,id2,r]*h3)) + frc2[bu1,id2,ru1] -= projalg(ztw*c1,(ga/h3)*g2) + else + X += c1*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3)) + frc2[bu1,id2,ru1] -= c1*projalg((ga/h3)*g2) + end + if TWH4 + frc1[b,id1,r] -= projalg(ztw*c1,U[b,id1,r]*g1/h4) + frc2[bu1,id2,ru1] -= projalg(ztw*c1,(g1/h4)*U[b,id1,r]) + frc2[bu2,id1,ru2] += projalg(ztw*c1,h4\U[b,id1,r]*g1) + else + frc1[b,id1,r] -= c1*projalg(U[b,id1,r]*g1/h4) + frc2[bu1,id2,ru1] -= c1*projalg((g1/h4)*U[b,id1,r]) + frc2[bu2,id1,ru2] += c1*projalg(h4\U[b,id1,r]*g1) + end + frc1[b,id1,r] -= X + frc1[b,id2,r] += X + + end + + end + + return nothing +end + + +## +## SF +## +function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, ztw, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {T,NB,N,M,D} b = Int64(CUDA.threadIdx().x) r = Int64(CUDA.blockIdx().x) @@ -21,8 +328,8 @@ function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, zt @inbounds begin for id1 in N:-1:1 bu1, ru1 = up((b, r), id1, lp) - SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1==N) - + SFBC = (id1==N) + for id2 = 1:id1-1 bu2, ru2 = up((b, r), id2, lp) ipl = ipl + 1 @@ -30,7 +337,7 @@ function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, zt TWP = (I[id1]==1) && (I[id2]==1) TWH2 = TWP || ( (I[id1]==lp.iL[id1]) && (I[id2]==1) ) TWH3 = TWP || ( (I[id1]==1) && (I[id2]==lp.iL[id2]) ) - + # H2 staple (b1, r1) = up((b,r), id1, lp) (b2, r2) = up((b1,r1), id1, lp) @@ -39,14 +346,14 @@ function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, zt else gb = U[b2,id2,r2] end - + (b2, r2) = up((b1,r1), id2, lp) h2 = (U[b1,id1,r1]*gb)/U[b2,id1,r2] - + # H3 staple (b1, r1) = up((b,r), id2, lp) (b2, r2) = up((b1,r1), id2, lp) - + (b3, r3) = up((b1,r1), id1, lp) if SFBC && (it == lp.iL[end]) gc = Ubnd[id2] @@ -55,15 +362,15 @@ function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, zt end h3 = (U[b1,id2,r1]*U[b2,id1,r2])/gc # END staples - + if SFBC && (it == lp.iL[end]) ga = Ubnd[id2] else ga = U[bu1,id2,ru1] end - + g2 = U[b,id2,r]\U[b,id1,r] - + if (it == lp.iL[end]) && SFBC S += cG*(c0*tr(g2*ga/U[bu2,id1,ru2]) + (3*c1/2)*tr(g2*ga/h3)) elseif (it == 1) && SFBC @@ -85,17 +392,17 @@ function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, zt S += c1*tr(g2*ga/h3) end end - + end end - + plx[I] = S end - + return nothing end -function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::SpaceParm{N,M,B,D}) where {T,N,M,B,D} +function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {T,N,M,D} @inbounds begin @@ -103,21 +410,20 @@ function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::SpaceParm{N,M,B r = Int64(CUDA.blockIdx().x) I = point_coord((b,r), lp) it = I[N] - IBND = ( ( (B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && - ( (it == 1) || (it == lp.iL[end])) ) - + IBND = ( (it == 1) || (it == lp.iL[end])) + S = zero(eltype(plx)) ipl = 0 for id1 in N:-1:1 bu1, ru1 = up((b, r), id1, lp) - SFBND = IBND && (id1 == N) + SFBND = IBND && (id1 == N) for id2 = 1:id1-1 bu2, ru2 = up((b, r), id2, lp) ipl = ipl + 1 TWP = (I[id1]==1) && (I[id2]==1) - - if SFBND && (it == lp.iL[end]) + + if SFBND && (it == lp.iL[end]) gt1 = Ubnd[id2] else gt1 = U[bu1,id2,ru1] @@ -134,46 +440,46 @@ function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::SpaceParm{N,M,B end end end - + plx[I] = S end - + return nothing end -function krnl_force_wilson_pln!(frc1, frc2, U::AbstractArray{T}, Ubnd, cG, ztw, ipl, lp::SpaceParm{N,M,B,D}) where {T,N,M,B,D} +function krnl_force_wilson_pln!(frc1, frc2, U::AbstractArray{T}, Ubnd, cG, ztw, ipl, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {T,N,M,D} b = Int64(CUDA.threadIdx().x) r = Int64(CUDA.blockIdx().x) I = point_coord((b,r), lp) it = I[N] - + @inbounds begin id1, id2 = lp.plidx[ipl] bu1, ru1 = up((b, r), id1, lp) bu2, ru2 = up((b, r), id2, lp) TWP = (I[id1]==1)&&(I[id2]==1) - - SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == N) - + + SFBC = (id1 == N) + if SFBC && (it == lp.iL[end]) gt1 = Ubnd[id2] else gt1 = U[bu1,id2,ru1] end - + g1 = gt1/U[bu2,id1,ru2] g2 = U[b,id2,r]\U[b,id1,r] - + if SFBC && (it == 1) X = cG*projalg(U[b,id1,r]*g1/U[b,id2,r]) - + frc1[b ,id1, r ] -= X frc2[bu1,id2,ru1] -= cG*projalg(g1*g2) frc2[bu2,id1,ru2] += cG*projalg(g2*g1) elseif SFBC && (it == lp.iL[end]) X = cG*projalg(U[b,id1,r]*g1/U[b,id2,r]) - + frc1[b ,id1, r ] -= X frc1[b ,id2, r ] += X frc2[bu2,id1,ru2] += cG*projalg(g2*g1) @@ -191,29 +497,29 @@ function krnl_force_wilson_pln!(frc1, frc2, U::AbstractArray{T}, Ubnd, cG, ztw, frc1[b ,id2, r ] += X end end - + return nothing end -function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG, ztw, ipl, lp::SpaceParm{N,M,B,D}) where {T,N,M,B,D} +function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG, ztw, ipl, lp::Union{SpaceParm{N,M,BC_SF_ORBI,D},SpaceParm{N,M,BC_SF_AFWB,D}}) where {T,N,M,D} b = Int64(CUDA.threadIdx().x) r = Int64(CUDA.blockIdx().x) I = point_coord((b,r), lp) it = I[N] - + @inbounds begin id1, id2 = lp.plidx[ipl] bu1, ru1 = up((b, r), id1, lp) bu2, ru2 = up((b, r), id2, lp) - - SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == N) + + SFBC = (id1 == N) TWP = (I[id1]==1) && (I[id2]==1) TWH1 = TWP || ( (I[id1]==1) && (I[id2]==2) ) TWH2 = TWP || ( (I[id1]==lp.iL[id1]) && (I[id2]==1) ) TWH3 = TWP || ( (I[id1]==1) && (I[id2]==lp.iL[id2]) ) TWH4 = TWP || ( (I[id1]==2) && (I[id2]==1) ) - + # H1 staple (b1, r1) = dw((b,r), id2, lp) (b2, r2) = up((b1,r1), id1, lp) @@ -223,7 +529,7 @@ function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG, gc = U[b2,id2,r2] end h1 = (U[b1,id2,r1]\U[b1,id1,r1])*gc - + # H2 staple (b1, r1) = up((b,r), id1, lp) (b2, r2) = up((b1,r1), id1, lp) @@ -232,10 +538,10 @@ function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG, else gb = U[b2,id2,r2] end - + (b2, r2) = up((b1,r1), id2, lp) h2 = (U[b1,id1,r1]*gb)/U[b2,id1,r2] - + # H3 staple (b1, r1) = up((b,r), id2, lp) (b2, r2) = up((b1,r1), id2, lp) @@ -246,42 +552,42 @@ function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG, gc = U[b3,id2,r3] end h3 = (U[b1,id2,r1]*U[b2,id1,r2])/gc - + # H4 staple (b1, r1) = dw((b,r), id1, lp) (b2, r2) = up((b1,r1), id2, lp) h4 = (U[b1,id1,r1]\U[b1,id2,r1])*U[b2,id1,r2] # END staples - + if SFBC && (it == lp.iL[end]) ga = Ubnd[id2] else ga = U[bu1,id2,ru1] end - + g1 = ga/U[bu2,id1,ru2] g2 = U[b,id2,r]\U[b,id1,r] - + if SFBC && (it == 1) X = (cG*c0)*projalg(U[b,id1,r]*g1/U[b,id2,r]) + c1*projalg(U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2])) + - (3*c1*cG/2)*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3)) - + (3*c1*cG/2)*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3)) + frc1[b,id1,r] -= X - + frc2[bu1,id2,ru1] -= (cG*c0)*projalg(g1*g2) + (3*c1*cG/2)*projalg((ga/h3)*g2) + (3*c1*cG/2)*projalg((g1/U[b,id2,r])*h1) - + frc2[bu2,id1,ru2] += (cG*c0)*projalg(g2*g1) + (3*c1*cG/2) * projalg((U[b,id2,r]\h1)*g1) + - c1*projalg(g2*h2/U[bu2,id1,ru2]) + c1*projalg(g2*h2/U[bu2,id1,ru2]) elseif SFBC && (it == lp.iL[end]) X = (cG*c0)*projalg(U[b,id1,r]*g1/U[b,id2,r]) + - (3*c1*cG/2) * (projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3))) - - frc1[b,id1,r] -= X + c1*projalg(U[b,id1,r]*g1/h4) - frc1[b,id2,r] += X + (3*c1*cG/2)*projalg(h1*g1/U[b,id2,r]) - + (3*c1*cG/2) * (projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3))) + + frc1[b,id1,r] -= X + c1*projalg(U[b,id1,r]*g1/h4) + frc1[b,id2,r] += X + (3*c1*cG/2)*projalg(h1*g1/U[b,id2,r]) + frc2[bu2,id1,ru2] += (cG*c0)*projalg(g2*g1) + (3*c1*cG/2) * projalg((U[b,id2,r]\h1)*g1) + - c1 * projalg(h4\U[b,id1,r]*g1) + c1 * projalg(h4\U[b,id1,r]*g1) else if TWP X = projalg(c0*ztw,U[b,id1,r]*g1/U[b,id2,r]) @@ -294,11 +600,11 @@ function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG, end if TWH1 frc1[b,id2,r] += projalg(ztw*c1,h1*g1/U[b,id2,r]) - frc2[bu1,id2,ru1] -= projalg(ztw*c1,(g1/U[b,id2,r])*h1) + frc2[bu1,id2,ru1] -= projalg(ztw*c1,(g1/U[b,id2,r])*h1) frc2[bu2,id1,ru2] += projalg(ztw*c1,(U[b,id2,r]\h1)*g1) else frc1[b,id2,r] += c1*projalg(h1*g1/U[b,id2,r]) - frc2[bu1,id2,ru1] -= c1*projalg((g1/U[b,id2,r])*h1) + frc2[bu1,id2,ru1] -= c1*projalg((g1/U[b,id2,r])*h1) frc2[bu2,id1,ru2] += c1*projalg((U[b,id2,r]\h1)*g1) end if TWH2 @@ -310,27 +616,274 @@ function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG, end if TWH3 X += projalg(ztw*c1,U[b,id1,r]*ga/(U[b,id2,r]*h3)) - frc2[bu1,id2,ru1] -= projalg(ztw*c1,(ga/h3)*g2) + frc2[bu1,id2,ru1] -= projalg(ztw*c1,(ga/h3)*g2) else X += c1*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3)) - frc2[bu1,id2,ru1] -= c1*projalg((ga/h3)*g2) + frc2[bu1,id2,ru1] -= c1*projalg((ga/h3)*g2) end if TWH4 - frc1[b,id1,r] -= projalg(ztw*c1,U[b,id1,r]*g1/h4) + frc1[b,id1,r] -= projalg(ztw*c1,U[b,id1,r]*g1/h4) frc2[bu1,id2,ru1] -= projalg(ztw*c1,(g1/h4)*U[b,id1,r]) - frc2[bu2,id1,ru2] += projalg(ztw*c1,h4\U[b,id1,r]*g1) + frc2[bu2,id1,ru2] += projalg(ztw*c1,h4\U[b,id1,r]*g1) else - frc1[b,id1,r] -= c1*projalg(U[b,id1,r]*g1/h4) + frc1[b,id1,r] -= c1*projalg(U[b,id1,r]*g1/h4) frc2[bu1,id2,ru1] -= c1*projalg((g1/h4)*U[b,id1,r]) - frc2[bu2,id1,ru2] += c1*projalg(h4\U[b,id1,r]*g1) + frc2[bu2,id1,ru2] += c1*projalg(h4\U[b,id1,r]*g1) + end + frc1[b,id1,r] -= X + frc1[b,id2,r] += X + + end + + end + + return nothing +end + + + +## +## PERIODIC +## +function krnl_impr!(plx, U::AbstractArray{T}, c0, c1, Ubnd::NTuple{NB,T}, cG, ztw, lp::SpaceParm{N,M,BC_PERIODIC,D}) where {T,NB,N,M,D} + + b = Int64(CUDA.threadIdx().x) + r = Int64(CUDA.blockIdx().x) + I = point_coord((b,r), lp) + it = I[N] + + ipl = 0 + S = zero(eltype(plx)) + @inbounds begin + for id1 in N:-1:1 + bu1, ru1 = up((b, r), id1, lp) + + for id2 = 1:id1-1 + bu2, ru2 = up((b, r), id2, lp) + ipl = ipl + 1 + + TWP = (I[id1]==1) && (I[id2]==1) + TWH2 = TWP || ( (I[id1]==lp.iL[id1]) && (I[id2]==1) ) + TWH3 = TWP || ( (I[id1]==1) && (I[id2]==lp.iL[id2]) ) + + # H2 staple + (b1, r1) = up((b,r), id1, lp) + (b2, r2) = up((b1,r1), id1, lp) + gb = U[b2,id2,r2] + + (b2, r2) = up((b1,r1), id2, lp) + h2 = (U[b1,id1,r1]*gb)/U[b2,id1,r2] + + # H3 staple + (b1, r1) = up((b,r), id2, lp) + (b2, r2) = up((b1,r1), id2, lp) + + (b3, r3) = up((b1,r1), id1, lp) + + gc = U[b3,id2,r3] + + h3 = (U[b1,id2,r1]*U[b2,id1,r2])/gc + # END staples + + ga = U[bu1,id2,ru1] + + g2 = U[b,id2,r]\U[b,id1,r] + + if TWP + S += (ztw[ipl]*c0)*tr(g2*ga/U[bu2,id1,ru2]) + else + S += c0*tr(g2*ga/U[bu2,id1,ru2]) + end + if TWH2 + S += (ztw[ipl]*c1)*tr(g2*h2/U[bu2,id1,ru2]) + else + S += c1*tr(g2*h2/U[bu2,id1,ru2]) + end + if TWH3 + S += (ztw[ipl]*c1)*tr(g2*ga/h3) + else + S += c1*tr(g2*ga/h3) + end + end - frc1[b,id1,r] -= X - frc1[b,id2,r] += X - end + plx[I] = S end + + return nothing +end + +function krnl_plaq!(plx, U::AbstractArray{T}, Ubnd, cG, ztw, lp::SpaceParm{N,M,BC_PERIODIC,D}) where {T,N,M,D} + + + @inbounds begin + b = Int64(CUDA.threadIdx().x) + r = Int64(CUDA.blockIdx().x) + I = point_coord((b,r), lp) + it = I[N] + + S = zero(eltype(plx)) + ipl = 0 + for id1 in N:-1:1 + bu1, ru1 = up((b, r), id1, lp) + + for id2 = 1:id1-1 + bu2, ru2 = up((b, r), id2, lp) + ipl = ipl + 1 + TWP = (I[id1]==1) && (I[id2]==1) + + gt1 = U[bu1,id2,ru1] + + if TWP + S += ztw[ipl]*tr(U[b,id1,r]*gt1 / (U[b,id2,r]*U[bu2,id1,ru2])) + else + S += tr(U[b,id1,r]*gt1 / (U[b,id2,r]*U[bu2,id1,ru2])) + end + end + end + plx[I] = S + end + + return nothing +end + +function krnl_force_wilson_pln!(frc1, frc2, U::AbstractArray{T}, Ubnd, cG, ztw, ipl, lp::SpaceParm{N,M,BC_PERIODIC,D}) where {T,N,M,D} + + b = Int64(CUDA.threadIdx().x) + r = Int64(CUDA.blockIdx().x) + I = point_coord((b,r), lp) + it = I[N] + + @inbounds begin + id1, id2 = lp.plidx[ipl] + bu1, ru1 = up((b, r), id1, lp) + bu2, ru2 = up((b, r), id2, lp) + TWP = (I[id1]==1)&&(I[id2]==1) + + gt1 = U[bu1,id2,ru1] + + g1 = gt1/U[bu2,id1,ru2] + g2 = U[b,id2,r]\U[b,id1,r] + + if TWP + X = projalg(ztw,U[b,id1,r]*g1/U[b,id2,r]) + frc2[bu1,id2,ru1] -= projalg(ztw,g1*g2) + frc2[bu2,id1,ru2] += projalg(ztw,g2*g1) + else + X = projalg(U[b,id1,r]*g1/U[b,id2,r]) + frc2[bu1,id2,ru1] -= projalg(g1*g2) + frc2[bu2,id1,ru2] += projalg(g2*g1) + end + frc1[b ,id1, r ] -= X + frc1[b ,id2, r ] += X + end + + return nothing +end + +function krnl_force_impr_pln!(frc1, frc2, U::AbstractArray{T}, c0, c1, Ubnd, cG, ztw, ipl, lp::SpaceParm{N,M,BC_PERIODIC,D}) where {T,N,M,D} + + b = Int64(CUDA.threadIdx().x) + r = Int64(CUDA.blockIdx().x) + I = point_coord((b,r), lp) + it = I[N] + + @inbounds begin + id1, id2 = lp.plidx[ipl] + bu1, ru1 = up((b, r), id1, lp) + bu2, ru2 = up((b, r), id2, lp) + + TWP = (I[id1]==1) && (I[id2]==1) + TWH1 = TWP || ( (I[id1]==1) && (I[id2]==2) ) + TWH2 = TWP || ( (I[id1]==lp.iL[id1]) && (I[id2]==1) ) + TWH3 = TWP || ( (I[id1]==1) && (I[id2]==lp.iL[id2]) ) + TWH4 = TWP || ( (I[id1]==2) && (I[id2]==1) ) + + # H1 staple + (b1, r1) = dw((b,r), id2, lp) + (b2, r2) = up((b1,r1), id1, lp) + + gc = U[b2,id2,r2] + + h1 = (U[b1,id2,r1]\U[b1,id1,r1])*gc + + # H2 staple + (b1, r1) = up((b,r), id1, lp) + (b2, r2) = up((b1,r1), id1, lp) + + gb = U[b2,id2,r2] + + (b2, r2) = up((b1,r1), id2, lp) + h2 = (U[b1,id1,r1]*gb)/U[b2,id1,r2] + + # H3 staple + (b1, r1) = up((b,r), id2, lp) + (b2, r2) = up((b1,r1), id2, lp) + (b3, r3) = up((b1,r1), id1, lp) + + gc = U[b3,id2,r3] + h3 = (U[b1,id2,r1]*U[b2,id1,r2])/gc + + # H4 staple + (b1, r1) = dw((b,r), id1, lp) + (b2, r2) = up((b1,r1), id2, lp) + h4 = (U[b1,id1,r1]\U[b1,id2,r1])*U[b2,id1,r2] + # END staples + + ga = U[bu1,id2,ru1] + + g1 = ga/U[bu2,id1,ru2] + g2 = U[b,id2,r]\U[b,id1,r] + + if TWP + X = projalg(c0*ztw,U[b,id1,r]*g1/U[b,id2,r]) + frc2[bu1,id2,ru1] -= projalg(c0*ztw,g1*g2) + frc2[bu2,id1,ru2] += projalg(c0*ztw,g2*g1) + else + X = c0*projalg(U[b,id1,r]*g1/U[b,id2,r]) + frc2[bu1,id2,ru1] -= c0*projalg(g1*g2) + frc2[bu2,id1,ru2] += c0*projalg(g2*g1) + end + if TWH1 + frc1[b,id2,r] += projalg(ztw*c1,h1*g1/U[b,id2,r]) + frc2[bu1,id2,ru1] -= projalg(ztw*c1,(g1/U[b,id2,r])*h1) + frc2[bu2,id1,ru2] += projalg(ztw*c1,(U[b,id2,r]\h1)*g1) + else + frc1[b,id2,r] += c1*projalg(h1*g1/U[b,id2,r]) + frc2[bu1,id2,ru1] -= c1*projalg((g1/U[b,id2,r])*h1) + frc2[bu2,id1,ru2] += c1*projalg((U[b,id2,r]\h1)*g1) + end + if TWH2 + X += projalg(ztw*c1,U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2])) + frc2[bu2,id1,ru2] += projalg(ztw*c1,g2*h2/U[bu2,id1,ru2]) + else + X += c1*projalg(U[b,id1,r]*h2/(U[b,id2,r]*U[bu2,id1,ru2])) + frc2[bu2,id1,ru2] += c1*projalg(g2*h2/U[bu2,id1,ru2]) + end + if TWH3 + X += projalg(ztw*c1,U[b,id1,r]*ga/(U[b,id2,r]*h3)) + frc2[bu1,id2,ru1] -= projalg(ztw*c1,(ga/h3)*g2) + else + X += c1*projalg(U[b,id1,r]*ga/(U[b,id2,r]*h3)) + frc2[bu1,id2,ru1] -= c1*projalg((ga/h3)*g2) + end + if TWH4 + frc1[b,id1,r] -= projalg(ztw*c1,U[b,id1,r]*g1/h4) + frc2[bu1,id2,ru1] -= projalg(ztw*c1,(g1/h4)*U[b,id1,r]) + frc2[bu2,id1,ru2] += projalg(ztw*c1,h4\U[b,id1,r]*g1) + else + frc1[b,id1,r] -= c1*projalg(U[b,id1,r]*g1/h4) + frc2[bu1,id2,ru1] -= c1*projalg((g1/h4)*U[b,id1,r]) + frc2[bu2,id1,ru2] += c1*projalg(h4\U[b,id1,r]*g1) + end + frc1[b,id1,r] -= X + frc1[b,id2,r] += X + + + end + return nothing end @@ -388,4 +941,3 @@ function force_pln!(frc1, ftmp, U, Ubnd, cG, ztw, lp::SpaceParm, c0=1) return nothing end - diff --git a/src/YM/YMflow.jl b/src/YM/YMflow.jl index 370c113..4f841c3 100644 --- a/src/YM/YMflow.jl +++ b/src/YM/YMflow.jl @@ -134,7 +134,8 @@ function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::S r = Int64(CUDA.blockIdx().x) it = point_time((b, r), lp) - SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) + SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) + OBC = (B == BC_OPEN) @inbounds for id in 1:N bu, ru = up((b,r), id, lp) @@ -152,13 +153,21 @@ function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::S frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) + projalg(U[b,id,r]*X/U[b,id,r])) end - else + end + if OBC + if (it > 1) && (it < lp.iL[end]) + frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) + + projalg(U[b,id,r]*X/U[b,id,r])) + elseif ((it == lp.iL[end]) || (it == 1)) && (id < N) + frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) + + projalg(U[b,id,r]*X/U[b,id,r])) + end + else frc2[b,id,r] = (5/6)*frc[b,id,r] + (1/6)*(projalg(Ud\Y*Ud) + projalg(U[b,id,r]*X/U[b,id,r])) end end end - return nothing end @@ -264,7 +273,8 @@ function Eoft_plaq(Eslc, U, gp::GaugeParm{T,G,NN}, lp::SpaceParm{N,M,B,D}, ymws: @timeit "E(t) plaquette measurement" begin ztw = ztwist(gp, lp) - SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) + SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) + OBC = (B == BC_OPEN) tp = ntuple(i->i, N-1) V3 = prod(lp.iL[1:end-1]) @@ -285,6 +295,10 @@ function Eoft_plaq(Eslc, U, gp::GaugeParm{T,G,NN}, lp::SpaceParm{N,M,B,D}, ymws: if !SFBC Eslc[1,ipl] = Etmp[1] + Etmp[end] end + if OBC ## Check normalization of timelike boundary plaquettes + Eslc[end,ipl] = Etmp[end-1] + Eslc[1,ipl] = Etmp[1] + end else for it in 1:lp.iL[end] Eslc[it,ipl] = 2*Etmp[it] @@ -327,7 +341,6 @@ function krnl_plaq_pln!(plx, U::AbstractArray{T}, Ubnd, ztw, ipl, lp::SpaceParm{ plx[I] = tr(U[b,id1,r]*gt / (U[b,id2,r]*U[bu2,id1,ru2])) end end - return nothing end @@ -350,21 +363,18 @@ function Qtop(Qslc, U, gp::GaugeParm, lp::SpaceParm{4,M,B,D}, ymws::YMworkspace) CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_qd!(ymws.rm, -, ymws.frc1, ymws.frc2, lp) end - CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, 2,4, ztw[2], ztw[4], lp) end CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_qd!(ymws.rm, +, ymws.frc1, ymws.frc2, lp) end - CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_field_tensor!(ymws.frc1, ymws.frc2, U, gp.Ubnd, 3,6, ztw[3], ztw[6], lp) end CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_qd!(ymws.rm, -, ymws.frc1, ymws.frc2, lp) end - Qslc .= reshape(Array(CUDA.reduce(+, ymws.rm; dims=tp)),lp.iL[end])./(32*pi^2) end @@ -445,7 +455,7 @@ function krnl_add_et!(rm, frc1, lp::SpaceParm{4,M,B,D}) where {M,B,D} I = point_coord((b,r), lp) rm[I] = dot(X1,X1) end - + return nothing end @@ -474,6 +484,7 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T}, #First plane id1, id2 = lp.plidx[ipl1] SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4) + OBC = ((B == BC_OPEN) && (id1 == 4)) TWP = ((I[id1]==1)&&(I[id2]==1)) bu1, ru1 = up((b, r), id1, lp) @@ -493,6 +504,11 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T}, frc1[bu1,2,ru1] = zero(TA) frc1[bd,3,rd] = zero(TA) frc1[bu2,4,ru2] = projalg(l2*l1) + elseif OBC && (it == lp.iL[end]) + frc1[b,1,r] = projalg(U[b,id1,r]*l1/U[b,id2,r]) + frc1[bu1,2,ru1] = zero(TA) + frc1[bd,3,rd] = zero(TA) + frc1[bu2,4,ru2] = projalg(l2*l1) else if TWP frc1[b,1,r] = projalg(ztw1, U[b,id1,r]*l1/U[b,id2,r]) @@ -510,6 +526,7 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T}, # Second plane id1, id2 = lp.plidx[ipl2] SFBC = ((B == BC_SF_AFWB) || (B == BC_SF_ORBI) ) && (id1 == 4) + OBC = ((B == BC_OPEN) && (id1 == 4)) TWP = ((I[id1]==1)&&(I[id2]==1)) bu1, ru1 = up((b, r), id1, lp) @@ -529,6 +546,11 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T}, frc2[bu1,2,ru1] = zero(TA) frc2[bd,3,rd] = zero(TA) frc2[bu2,4,ru2] = projalg(l2*l1) + elseif OBC && (it == lp.iL[end]) + frc1[b,1,r] = projalg(U[b,id1,r]*l1/U[b,id2,r]) + frc1[bu1,2,ru1] = zero(TA) + frc1[bd,3,rd] = zero(TA) + frc1[bu2,4,ru2] = projalg(l2*l1) else if TWP frc2[b,1,r] = projalg(ztw2, U[b,id1,r]*l1/U[b,id2,r]) @@ -543,7 +565,5 @@ function krnl_field_tensor!(frc1::AbstractArray{TA}, frc2, U::AbstractArray{T}, end end end - return nothing end -