diff --git a/src/Groups/GroupSU3.jl b/src/Groups/GroupSU3.jl index d049206..85a1c96 100644 --- a/src/Groups/GroupSU3.jl +++ b/src/Groups/GroupSU3.jl @@ -111,7 +111,7 @@ function projalg(a::SU3{T}) where T <: AbstractFloat sr3ov2::T = 0.866025403784438646763723170752 - ditr = ( imag(a.u11) + imag(a.u22) - 2.0*imag(a.u11*a.u22 - a.u12*a.u21) )/3.0 + ditr = ( imag(a.u11) + imag(a.u22) + 2.0*imag(a.u11*a.u22 - a.u12*a.u21) )/3.0 m12 = (a.u12 - conj(a.u21))/2.0 m13 = (a.u13 - (a.u12*a.u23 - a.u13*a.u22) )/2.0 m23 = (a.u23 - (a.u13*a.u21 - a.u11*a.u23) )/2.0 @@ -122,21 +122,6 @@ function projalg(a::SU3{T}) where T <: AbstractFloat sr3ov2*(ditr)) end -function projalg(a::M3x3{T}) where T <: AbstractFloat - - sr3ov2::T = 0.866025403784438646763723170752 - - ditr = ( imag(a.u11) + imag(a.u22) + 2.0*imag(a.u33) )/3.0 - m12 = (a.u12 - conj(a.u21))/2.0 - m13 = (a.u13 - conj(a.u31))/2.0 - m23 = (a.u23 - conj(a.u32))/2.0 - - return SU3alg{T}(imag( m12 ), imag( m13 ), imag( m23 ), - real( m12 ), real( m13 ), real( m23 ), - (imag(a.u11)-imag(a.u22))/2.0, - sr3ov2*(ditr)) -end - dot(a::SU3alg{T},b::SU3alg{T}) where T <: AbstractFloat = a.t1*b.t1 + a.t2*b.t2 + a.t3*b.t3 + a.t4*b.t4 + a.t5*b.t5 + a.t6*b.t6 + a.t7*b.t7 + a.t8*b.t8 norm2(a::SU3alg{T}) where T <: AbstractFloat = a.t1^2 + a.t2^2 + a.t3^2 + a.t4^2 + a.t5^2 + a.t6^2 + a.t7^2 + a.t8^2 norm(a::SU3alg{T}) where T <: AbstractFloat = sqrt(a.t1^2 + a.t2^2 + a.t3^2 + a.t4^2 + a.t5^2 + a.t6^2 + a.t7^2 + a.t8^2) @@ -270,7 +255,20 @@ Base.:-(b::M3x3{T}) where T <: AbstractFloat = M3x3{T}(-b.u11, -b.u12 Base.:+(b::M3x3{T}) where T <: AbstractFloat = M3x3{T}(b.u11, b.u12, bu13, b.u21, b.u22, bu23, b.u31, b.u32, bu33) +function projalg(a::M3x3{T}) where T <: AbstractFloat + sr3ov2::T = 0.866025403784438646763723170752 + + ditr = ( imag(a.u11) + imag(a.u22) + 2.0*imag(a.u33) )/3.0 + m12 = (a.u12 - conj(a.u21))/2.0 + m13 = (a.u13 - conj(a.u31))/2.0 + m23 = (a.u23 - conj(a.u32))/2.0 + + return SU3alg{T}(imag( m12 ), imag( m13 ), imag( m23 ), + real( m12 ), real( m13 ), real( m23 ), + (imag(a.u11)-imag(a.u22))/2.0, + sr3ov2*(ditr)) +end function alg2mat(a::SU3alg{T}) where T <: AbstractFloat @@ -295,7 +293,7 @@ end Base.:*(a::SU3alg,b::SU3) = alg2mat(a)*b Base.:*(a::SU3,b::SU3alg) = a*alg2mat(b) Base.:/(a::SU3alg,b::SU3) = alg2mat(a)/b -Base.\:(a::SU3,b::SU3alg) = a\alg2mat(b) +Base.:\(a::SU3,b::SU3alg) = a\alg2mat(b) @inline function exp_iter(dch::Complex{T}, tch::T) where T <: AbstractFloat diff --git a/src/Space/Space.jl b/src/Space/Space.jl index 71bc94b..f029f8b 100644 --- a/src/Space/Space.jl +++ b/src/Space/Space.jl @@ -74,7 +74,12 @@ function Base.show(io::IO, lp::SpaceParm) return end +""" + function up(p::NTuple{2,Int64}, id::Int64, lp::SpaceParm) +Given a point `x` with index `p`, this routine returns the index of the point +`x + a id`. +""" @inline function up(p::NTuple{2,Int64}, id::Int64, lp::SpaceParm) ic = mod(div(p[1]-1,lp.blkS[id]),lp.blk[id]) @@ -96,6 +101,12 @@ end return b, r end +""" + function dw(p::NTuple{2,Int64}, id::Int64, lp::SpaceParm) + +Given a point `x` with index `p`, this routine returns the index of the point +`x - a id`. +""" @inline function dw(p::NTuple{2,Int64}, id::Int64, lp::SpaceParm) ic = mod(div(p[1]-1,lp.blkS[id]),lp.blk[id]) @@ -116,6 +127,12 @@ end return b, r end +""" + function up(p::NTuple{2,Int64}, id::Int64, lp::SpaceParm) + +Given a point `x` with index `p`, this routine returns the index of the points +`x + a id` and `x - a id`. +""" @inline function updw(p::NTuple{2,Int64}, id::Int64, lp::SpaceParm) ic = mod(div(p[1]-1,lp.blkS[id]),lp.blk[id]) @@ -152,7 +169,7 @@ end return bu, ru, bd, rd end -@inline function global_point(p::NTuple{2,Int64}, lp::SpaceParm) +@inline function point_coord(p::NTuple{2,Int64}, lp::SpaceParm) @inline cntb(nb, id::Int64, lp::SpaceParm) = mod(div(nb-1,lp.blkS[id]),lp.blk[id]) @inline cntr(nr, id::Int64, lp::SpaceParm) = mod(div(nr-1,lp.rbkS[id]),lp.rbk[id]) @@ -163,11 +180,22 @@ end return pt end -@inline function point_idx(pt::NTuple{4, Int64}, lp::SpaceParm) +@inline function point_time(p::NTuple{2,Int64}, lp::SpaceParm) + + @inline cntb(nb, id::Int64, lp::SpaceParm) = mod(div(nb-1,lp.blkS[id]),lp.blk[id]) + @inline cntr(nr, id::Int64, lp::SpaceParm) = mod(div(nr-1,lp.rbkS[id]),lp.rbk[id]) + + @inline cnt(nb, nr, id::Int64, lp::SpaceParm) = 1 + cntb(nb,id,lp) + cntr(nr,id,lp)*lp.blk[id] + + return cnt(p[1], p[2], 1, lp) +end + + +@inline function point_index(pt::NTuple{4, Int64}, lp::SpaceParm) end -export up, dw, updw, global_point, point_idx +export up, dw, updw, global_point, point_index, point_coord, point_time end diff --git a/src/YM/YMflow.jl b/src/YM/YMflow.jl index c848e7a..7c84163 100644 --- a/src/YM/YMflow.jl +++ b/src/YM/YMflow.jl @@ -20,7 +20,7 @@ function add_zth_term(ymws::YMworkspace, U, lp) CUDA.@sync begin CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_zth!(ymws.frc1,ymws.frc2,U,lp) end - ymws.frc1 .= (5/6).*ymws.frc1 .+ ymws.frc2 + ymws.frc1 .= ymws.frc2 return nothing end @@ -30,14 +30,15 @@ function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::S b, r = CUDA.threadIdx().x, CUDA.blockIdx().x Ush = @cuStaticSharedMem(TG, D) - Ush = @cuStaticSharedMem(TA, D) + Fsh = @cuStaticSharedMem(TA, D) @inbounds for id in 1:N Ush[b] = U[b,id,r] - Fsh[b] = Frc[b,id,r] + Fsh[b] = frc[b,id,r] sync_threads() - bu, ru, bd, rd = updw((b,r), id, lp) + bu, ru = up((b,r), id, lp) + bd, rd = dw((b,r), id, lp) if ru == r X = Fsh[bu] @@ -52,8 +53,8 @@ function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::S Ud = U[bd,id,rd] end - frc2[b,id,r] = (1/6)*(projalg(Ud[b]\Y*Ud[b]) + - projalg(Ush[b]*X/Ush[b])) + frc2[b,id,r] = (5/6)*Fsh[b] + (1/6)*(projalg(Ud\Y*Ud) + + projalg(Ush[b]*X/Ush[b])) end return nothing @@ -76,29 +77,29 @@ end function flw_rk3(U, ns, eps, c0, lp::SpaceParm, ymws::YMworkspace; add_zth=false) for i in 1:ns - c0 = eps/2 + e0 = eps/2 force_gauge(ymws, U, c0, lp) if add_zth add_zth_term(ymws::YMworkspace, U, lp) end ymws.mom .= ymws.frc1 - U .= expm.(U, ymws.mom, c0) + U .= expm.(U, ymws.mom, e0) - c0 = -34*eps/36 - c1 = 16*eps/9 + e0 = -34*eps/36 + e1 = 16*eps/9 force_gauge(ymws, U, c0, lp) if add_zth add_zth_term(ymws::YMworkspace, U, lp) end - ymws.mom .= c0.*ymws.mom .+ c1.*ymws.frc1 + ymws.mom .= e0.*ymws.mom .+ e1.*ymws.frc1 U .= expm.(U, ymws.mom) - c1 = 6*eps/4 + e1 = 6*eps/4 force_gauge(ymws, U, c0, lp) if add_zth add_zth_term(ymws::YMworkspace, U, lp) end - ymws.mom .= c1.*ymws.frc1 .- ymws.mom + ymws.mom .= e1.*ymws.frc1 .- ymws.mom U .= expm.(U, ymws.mom) end diff --git a/src/main/times.jl b/src/main/times.jl index 5b81e38..757ea29 100644 --- a/src/main/times.jl +++ b/src/main/times.jl @@ -7,7 +7,7 @@ Pkg.activate("/lhome/ific/a/alramos/s.images/julia/workspace/LatticeGPU") using LatticeGPU # Set lattice/block size -lp = SpaceParm{4}((96,96,32,32), (4,4,4,4)) +lp = SpaceParm{4}((32,32,32,32), (4,4,4,4)) println("Space Parameters: ", lp) # Seed RNG