### ### "THE BEER-WARE LICENSE": ### Alberto Ramos wrote this file. As long as you retain this ### notice you can do whatever you want with this stuff. If we meet some ### day, and you think this stuff is worth it, you can buy me a beer in ### return. <> ### ### file: YMact.jl ### created: Mon Jul 12 18:31:19 2021 ### """ computes global observables by calling krnl_obs! and summing for all lattice points """ function scalar_obs(U, Phi, sp::ScalarParm, lp::SpaceParm, ymws::YMworkspace) @timeit "Scalar observables" begin CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_obs!(ymws.rm, ymws.cm, U, Phi, sp, lp) end V = prod(lp.iL) #summation of global observables rho2 = CUDA.mapreduce(norm2, +, Phi)/V lphi = CUDA.reduce(+, ymws.rm)/(lp.ndim*V) lalpha = CUDA.mapreduce(real, +, ymws.cm)/(lp.ndim*V) end return rho2, lphi, lalpha end """ CUDA function to compute the observables defined in the Obs struct for each lattice point """ function krnl_obs!(rm, cm, U::AbstractArray{TG}, Phi::AbstractArray{TS}, sp::ScalarParm{NP,T}, lp::SpaceParm{N,M,D}) where {TG,TS,NP,T,N,M,D} #thread/block coordinate b, r = CUDA.threadIdx().x, CUDA.blockIdx().x Ush = @cuStaticSharedMem(TG, (D,N)) Psh = @cuStaticSharedMem(TS, (D,NP)) for id in 1:N Ush[b,id] = U[b,id,r] end for i in 1:NP Psh[b,i] = Phi[b,i,r] end sync_threads() IX = point_coord((b,r), lp) rm[IX] = zero(eltype(rm)) cm[IX] = zero(eltype(cm)) #compute obs for i in 1:NP psq = norm( Psh[b,i] ) for id in 1:N bu, ru = up((b, r), id, lp) #thread/block coordinate of up point if (ru == r) phiup = Psh[bu,i] else phiup = Phi[bu,i,ru] end psqup = norm(phiup) rm[IX] += dot( Psh[b,i], Ush[b,id]*phiup ) cm[IX] += complex(rm[IX])/(psq*psqup) end end return nothing end