From f6be70070e5068cbdf6630ed9d20359d628107dd Mon Sep 17 00:00:00 2001 From: Alberto Ramos Date: Mon, 19 Jul 2021 02:03:31 +0200 Subject: [PATCH] Working SU(3) code --- src/YM/YMfields.jl | 112 ++++++++++++++++++++++++++++++++++++++++ src/YM/YMhmc.jl | 125 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 237 insertions(+) create mode 100644 src/YM/YMfields.jl create mode 100644 src/YM/YMhmc.jl diff --git a/src/YM/YMfields.jl b/src/YM/YMfields.jl new file mode 100644 index 0000000..d4fc853 --- /dev/null +++ b/src/YM/YMfields.jl @@ -0,0 +1,112 @@ +### +### "THE BEER-WARE LICENSE": +### Alberto Ramos wrote this file. As long as you retain this +### notice you can do whatever you want with this stuff. If we meet some +### day, and you think this stuff is worth it, you can buy me a beer in +### return. +### +### file: YMfields.jl +### created: Thu Jul 15 15:16:47 2021 +### + + +function field(::Type{T}, lp::SpaceParm) where {T <: Union{Group, Algebra}} + + sz = lp.iL..., lp.ndim + if (T == SU2) + As = StructArray{SU2}((ones(ComplexF64, sz), zeros(ComplexF64, sz))) + elseif (T == SU2alg) + As = StructArray{SU2alg}((zeros(Float64, sz), + zeros(Float64, sz), + zeros(Float64, sz))) + elseif (T == SU3) + As = StructArray{SU3}((ones(ComplexF64, sz), zeros(ComplexF64, sz), zeros(ComplexF64, sz), zeros(ComplexF64, sz), ones(ComplexF64, sz), zeros(ComplexF64, sz))) + elseif (T == SU3alg) + As = StructArray{SU3alg}((zeros(Float64, sz), + zeros(Float64, sz), + zeros(Float64, sz), + zeros(Float64, sz), + zeros(Float64, sz), + zeros(Float64, sz), + zeros(Float64, sz), + zeros(Float64, sz))) + end + + return replace_storage(CuArray, As) +end + +function randomn!(X) + + if (eltype(X) == SU2alg) + randn!(CURAND.default_rng(), LazyRows(X).t1) + randn!(CURAND.default_rng(), LazyRows(X).t2) + randn!(CURAND.default_rng(), LazyRows(X).t3) + elseif (eltype(X) == SU3alg) + randn!(CURAND.default_rng(), LazyRows(X).t1) + randn!(CURAND.default_rng(), LazyRows(X).t2) + randn!(CURAND.default_rng(), LazyRows(X).t3) + randn!(CURAND.default_rng(), LazyRows(X).t4) + randn!(CURAND.default_rng(), LazyRows(X).t5) + randn!(CURAND.default_rng(), LazyRows(X).t6) + randn!(CURAND.default_rng(), LazyRows(X).t7) + randn!(CURAND.default_rng(), LazyRows(X).t8) + end + return nothing +end + +function zero!(X) + + if (eltype(X) == SU2alg) + fill!(LazyRows(X).t1, 0.0) + fill!(LazyRows(X).t2, 0.0) + fill!(LazyRows(X).t3, 0.0) + end + + if (eltype(X) == SU3alg) + fill!(LazyRows(X).t1, 0.0) + fill!(LazyRows(X).t2, 0.0) + fill!(LazyRows(X).t3, 0.0) + fill!(LazyRows(X).t4, 0.0) + fill!(LazyRows(X).t5, 0.0) + fill!(LazyRows(X).t6, 0.0) + fill!(LazyRows(X).t7, 0.0) + fill!(LazyRows(X).t8, 0.0) + end + + if (eltype(X) == SU2) + fill!(LazyRows(X).t1, complex(1.0)) + fill!(LazyRows(X).t2, complex(0.0)) + end + + if (eltype(X) == SU3) + fill!(LazyRows(X).u11, complex(1.0)) + fill!(LazyRows(X).u12, complex(0.0)) + fill!(LazyRows(X).u13, complex(0.0)) + fill!(LazyRows(X).u21, complex(0.0)) + fill!(LazyRows(X).u22, complex(1.0)) + fill!(LazyRows(X).u23, complex(0.0)) + end + + return nothing +end + +function norm2(X) + + d = 0.0 + if (eltype(X) == SU2alg) + d = CUDA.mapreduce(x->x^2, +, LazyRows(X).t1) + + CUDA.mapreduce(x->x^2, +, LazyRows(X).t2) + + CUDA.mapreduce(x->x^2, +, LazyRows(X).t3) + elseif (eltype(X) == SU2alg) + d = CUDA.mapreduce(x->x^2, +, LazyRows(X).t1) + + CUDA.mapreduce(x->x^2, +, LazyRows(X).t2) + + CUDA.mapreduce(x->x^2, +, LazyRows(X).t3) + + CUDA.mapreduce(x->x^2, +, LazyRows(X).t4) + + CUDA.mapreduce(x->x^2, +, LazyRows(X).t5) + + CUDA.mapreduce(x->x^2, +, LazyRows(X).t6) + + CUDA.mapreduce(x->x^2, +, LazyRows(X).t7) + + CUDA.mapreduce(x->x^2, +, LazyRows(X).t8) + end + + return d +end diff --git a/src/YM/YMhmc.jl b/src/YM/YMhmc.jl new file mode 100644 index 0000000..852f6e4 --- /dev/null +++ b/src/YM/YMhmc.jl @@ -0,0 +1,125 @@ +### +### "THE BEER-WARE LICENSE": +### Alberto Ramos wrote this file. As long as you retain this +### notice you can do whatever you want with this stuff. If we meet some +### day, and you think this stuff is worth it, you can buy me a beer in +### return. +### +### file: YMhmc.jl +### created: Thu Jul 15 11:27:28 2021 +### + +function gauge_action(U, lp::SpaceParm, gp::GaugeParm, kp::KernelParm, ymws::YMworkspace) + + CUDA.@sync begin + CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_plaq!(ymws.cm, U, lp) + end + S = gp.beta*( prod(lp.iL)*lp.npls - + CUDA.mapreduce(real, +, real.(ymws.cm))/gp.ng ) + + return S +end + +function plaquette(U, lp::SpaceParm, gp::GaugeParm, kp::KernelParm, ymws::YMworkspace) + + CUDA.@sync begin + CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_plaq!(ymws.cm, U, lp) + end + + return CUDA.mapreduce(real, +, real.(ymws.cm))/(prod(lp.iL)*lp.npls) +end + +hamiltonian(mom, U, lp, gp, kp, ymws) = norm2(mom)/2.0 + + gauge_action(U, lp, gp, kp, ymws) + +function HMC!(U, eps, ns, lp::SpaceParm, gp::GaugeParm, kp::KernelParm, ymws::YMworkspace; noacc=false) + + ymws.U1 .= U + + randomn!(ymws.mom) + hini = hamiltonian(ymws.mom, U, lp, gp, kp, ymws) + + OMF4!(ymws.mom, U, eps, ns, lp, gp, kp, ymws) + + dh = hamiltonian(ymws.mom, U, lp, gp, kp, ymws) - hini + pacc = exp(-dh) + + acc = true + if (noacc) + return dh, acc + end + + if (pacc < 1.0) + r = rand() + if (pacc < r) + U .= ymws.U1 + acc = false + end + end + + return dh, acc +end + +function krnl_updt!(mom, frc1, frc2, eps1, U, eps2, lp::SpaceParm) + + X = map2latt((CUDA.threadIdx().x,CUDA.threadIdx().y,CUDA.threadIdx().z), + (CUDA.blockIdx().x,CUDA.blockIdx().y,CUDA.blockIdx().z)) + + for id in 1:lp.ndim + mom[X,id] = mom[X,id] + eps1 * (frc1[X,id]+frc2[X,id]) + U[X,id] = expm(U[X,id], mom[X,id], eps2) + end + + return nothing +end + +function OMF4!(mom, U, eps, ns, lp::SpaceParm, gp::GaugeParm, kp::KernelParm, ymws::YMworkspace) + + r1::Float64 = 0.08398315262876693 + r2::Float64 = 0.2539785108410595 + r3::Float64 = 0.6822365335719091 + r4::Float64 = -0.03230286765269967 + r5::Float64 = 0.5-r1-r3 + r6::Float64 = 1.0-2.0*(r2+r4) + + ee = eps*gp.beta/gp.ng + force0_wilson!(ymws.frc1,ymws.frc2, U, lp, gp, kp) + for i in 1:ns + # STEP 1 + CUDA.@sync begin + CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r1*ee, U, eps*r2, lp) + end + + # STEP 2 + force0_wilson!(ymws.frc1,ymws.frc2, U, lp, gp, kp) + CUDA.@sync begin + CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r3*ee, U, eps*r4, lp) + end + + # STEP 3 + force0_wilson!(ymws.frc1,ymws.frc2, U, lp, gp, kp) + CUDA.@sync begin + CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r5*ee, U, eps*r6, lp) + end + + # STEP 4 + force0_wilson!(ymws.frc1,ymws.frc2, U, lp, gp, kp) + CUDA.@sync begin + CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r5*ee, U, eps*r4, lp) + end + + # STEP 5 + force0_wilson!(ymws.frc1,ymws.frc2, U, lp, gp, kp) + CUDA.@sync begin + CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r3*ee, U, eps*r2, lp) + end + + # STEP 6 + force0_wilson!(ymws.frc1,ymws.frc2, U, lp, gp, kp) + CUDA.@sync begin + CUDA.@cuda threads=kp.threads blocks=kp.blocks krnl_updt!(ymws.mom, ymws.frc1,ymws.frc2, r1*ee, U, 0.0, lp) + end + end + + return nothing +end