mirror of
https://igit.ific.uv.es/alramos/latticegpu.jl.git
synced 2025-05-14 19:23:42 +02:00
Bugs corrected, Zeuthen flow working
This commit is contained in:
parent
334ddf813a
commit
9de5e4ad59
4 changed files with 61 additions and 34 deletions
|
@ -111,7 +111,7 @@ function projalg(a::SU3{T}) where T <: AbstractFloat
|
||||||
|
|
||||||
sr3ov2::T = 0.866025403784438646763723170752
|
sr3ov2::T = 0.866025403784438646763723170752
|
||||||
|
|
||||||
ditr = ( imag(a.u11) + imag(a.u22) - 2.0*imag(a.u11*a.u22 - a.u12*a.u21) )/3.0
|
ditr = ( imag(a.u11) + imag(a.u22) + 2.0*imag(a.u11*a.u22 - a.u12*a.u21) )/3.0
|
||||||
m12 = (a.u12 - conj(a.u21))/2.0
|
m12 = (a.u12 - conj(a.u21))/2.0
|
||||||
m13 = (a.u13 - (a.u12*a.u23 - a.u13*a.u22) )/2.0
|
m13 = (a.u13 - (a.u12*a.u23 - a.u13*a.u22) )/2.0
|
||||||
m23 = (a.u23 - (a.u13*a.u21 - a.u11*a.u23) )/2.0
|
m23 = (a.u23 - (a.u13*a.u21 - a.u11*a.u23) )/2.0
|
||||||
|
@ -122,21 +122,6 @@ function projalg(a::SU3{T}) where T <: AbstractFloat
|
||||||
sr3ov2*(ditr))
|
sr3ov2*(ditr))
|
||||||
end
|
end
|
||||||
|
|
||||||
function projalg(a::M3x3{T}) where T <: AbstractFloat
|
|
||||||
|
|
||||||
sr3ov2::T = 0.866025403784438646763723170752
|
|
||||||
|
|
||||||
ditr = ( imag(a.u11) + imag(a.u22) + 2.0*imag(a.u33) )/3.0
|
|
||||||
m12 = (a.u12 - conj(a.u21))/2.0
|
|
||||||
m13 = (a.u13 - conj(a.u31))/2.0
|
|
||||||
m23 = (a.u23 - conj(a.u32))/2.0
|
|
||||||
|
|
||||||
return SU3alg{T}(imag( m12 ), imag( m13 ), imag( m23 ),
|
|
||||||
real( m12 ), real( m13 ), real( m23 ),
|
|
||||||
(imag(a.u11)-imag(a.u22))/2.0,
|
|
||||||
sr3ov2*(ditr))
|
|
||||||
end
|
|
||||||
|
|
||||||
dot(a::SU3alg{T},b::SU3alg{T}) where T <: AbstractFloat = a.t1*b.t1 + a.t2*b.t2 + a.t3*b.t3 + a.t4*b.t4 + a.t5*b.t5 + a.t6*b.t6 + a.t7*b.t7 + a.t8*b.t8
|
dot(a::SU3alg{T},b::SU3alg{T}) where T <: AbstractFloat = a.t1*b.t1 + a.t2*b.t2 + a.t3*b.t3 + a.t4*b.t4 + a.t5*b.t5 + a.t6*b.t6 + a.t7*b.t7 + a.t8*b.t8
|
||||||
norm2(a::SU3alg{T}) where T <: AbstractFloat = a.t1^2 + a.t2^2 + a.t3^2 + a.t4^2 + a.t5^2 + a.t6^2 + a.t7^2 + a.t8^2
|
norm2(a::SU3alg{T}) where T <: AbstractFloat = a.t1^2 + a.t2^2 + a.t3^2 + a.t4^2 + a.t5^2 + a.t6^2 + a.t7^2 + a.t8^2
|
||||||
norm(a::SU3alg{T}) where T <: AbstractFloat = sqrt(a.t1^2 + a.t2^2 + a.t3^2 + a.t4^2 + a.t5^2 + a.t6^2 + a.t7^2 + a.t8^2)
|
norm(a::SU3alg{T}) where T <: AbstractFloat = sqrt(a.t1^2 + a.t2^2 + a.t3^2 + a.t4^2 + a.t5^2 + a.t6^2 + a.t7^2 + a.t8^2)
|
||||||
|
@ -270,7 +255,20 @@ Base.:-(b::M3x3{T}) where T <: AbstractFloat = M3x3{T}(-b.u11, -b.u12
|
||||||
Base.:+(b::M3x3{T}) where T <: AbstractFloat = M3x3{T}(b.u11, b.u12, bu13,
|
Base.:+(b::M3x3{T}) where T <: AbstractFloat = M3x3{T}(b.u11, b.u12, bu13,
|
||||||
b.u21, b.u22, bu23,
|
b.u21, b.u22, bu23,
|
||||||
b.u31, b.u32, bu33)
|
b.u31, b.u32, bu33)
|
||||||
|
function projalg(a::M3x3{T}) where T <: AbstractFloat
|
||||||
|
|
||||||
|
sr3ov2::T = 0.866025403784438646763723170752
|
||||||
|
|
||||||
|
ditr = ( imag(a.u11) + imag(a.u22) + 2.0*imag(a.u33) )/3.0
|
||||||
|
m12 = (a.u12 - conj(a.u21))/2.0
|
||||||
|
m13 = (a.u13 - conj(a.u31))/2.0
|
||||||
|
m23 = (a.u23 - conj(a.u32))/2.0
|
||||||
|
|
||||||
|
return SU3alg{T}(imag( m12 ), imag( m13 ), imag( m23 ),
|
||||||
|
real( m12 ), real( m13 ), real( m23 ),
|
||||||
|
(imag(a.u11)-imag(a.u22))/2.0,
|
||||||
|
sr3ov2*(ditr))
|
||||||
|
end
|
||||||
|
|
||||||
function alg2mat(a::SU3alg{T}) where T <: AbstractFloat
|
function alg2mat(a::SU3alg{T}) where T <: AbstractFloat
|
||||||
|
|
||||||
|
@ -295,7 +293,7 @@ end
|
||||||
Base.:*(a::SU3alg,b::SU3) = alg2mat(a)*b
|
Base.:*(a::SU3alg,b::SU3) = alg2mat(a)*b
|
||||||
Base.:*(a::SU3,b::SU3alg) = a*alg2mat(b)
|
Base.:*(a::SU3,b::SU3alg) = a*alg2mat(b)
|
||||||
Base.:/(a::SU3alg,b::SU3) = alg2mat(a)/b
|
Base.:/(a::SU3alg,b::SU3) = alg2mat(a)/b
|
||||||
Base.\:(a::SU3,b::SU3alg) = a\alg2mat(b)
|
Base.:\(a::SU3,b::SU3alg) = a\alg2mat(b)
|
||||||
|
|
||||||
@inline function exp_iter(dch::Complex{T}, tch::T) where T <: AbstractFloat
|
@inline function exp_iter(dch::Complex{T}, tch::T) where T <: AbstractFloat
|
||||||
|
|
||||||
|
|
|
@ -74,7 +74,12 @@ function Base.show(io::IO, lp::SpaceParm)
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
|
|
||||||
|
"""
|
||||||
|
function up(p::NTuple{2,Int64}, id::Int64, lp::SpaceParm)
|
||||||
|
|
||||||
|
Given a point `x` with index `p`, this routine returns the index of the point
|
||||||
|
`x + a id`.
|
||||||
|
"""
|
||||||
@inline function up(p::NTuple{2,Int64}, id::Int64, lp::SpaceParm)
|
@inline function up(p::NTuple{2,Int64}, id::Int64, lp::SpaceParm)
|
||||||
|
|
||||||
ic = mod(div(p[1]-1,lp.blkS[id]),lp.blk[id])
|
ic = mod(div(p[1]-1,lp.blkS[id]),lp.blk[id])
|
||||||
|
@ -96,6 +101,12 @@ end
|
||||||
return b, r
|
return b, r
|
||||||
end
|
end
|
||||||
|
|
||||||
|
"""
|
||||||
|
function dw(p::NTuple{2,Int64}, id::Int64, lp::SpaceParm)
|
||||||
|
|
||||||
|
Given a point `x` with index `p`, this routine returns the index of the point
|
||||||
|
`x - a id`.
|
||||||
|
"""
|
||||||
@inline function dw(p::NTuple{2,Int64}, id::Int64, lp::SpaceParm)
|
@inline function dw(p::NTuple{2,Int64}, id::Int64, lp::SpaceParm)
|
||||||
|
|
||||||
ic = mod(div(p[1]-1,lp.blkS[id]),lp.blk[id])
|
ic = mod(div(p[1]-1,lp.blkS[id]),lp.blk[id])
|
||||||
|
@ -116,6 +127,12 @@ end
|
||||||
return b, r
|
return b, r
|
||||||
end
|
end
|
||||||
|
|
||||||
|
"""
|
||||||
|
function up(p::NTuple{2,Int64}, id::Int64, lp::SpaceParm)
|
||||||
|
|
||||||
|
Given a point `x` with index `p`, this routine returns the index of the points
|
||||||
|
`x + a id` and `x - a id`.
|
||||||
|
"""
|
||||||
@inline function updw(p::NTuple{2,Int64}, id::Int64, lp::SpaceParm)
|
@inline function updw(p::NTuple{2,Int64}, id::Int64, lp::SpaceParm)
|
||||||
|
|
||||||
ic = mod(div(p[1]-1,lp.blkS[id]),lp.blk[id])
|
ic = mod(div(p[1]-1,lp.blkS[id]),lp.blk[id])
|
||||||
|
@ -152,7 +169,7 @@ end
|
||||||
return bu, ru, bd, rd
|
return bu, ru, bd, rd
|
||||||
end
|
end
|
||||||
|
|
||||||
@inline function global_point(p::NTuple{2,Int64}, lp::SpaceParm)
|
@inline function point_coord(p::NTuple{2,Int64}, lp::SpaceParm)
|
||||||
|
|
||||||
@inline cntb(nb, id::Int64, lp::SpaceParm) = mod(div(nb-1,lp.blkS[id]),lp.blk[id])
|
@inline cntb(nb, id::Int64, lp::SpaceParm) = mod(div(nb-1,lp.blkS[id]),lp.blk[id])
|
||||||
@inline cntr(nr, id::Int64, lp::SpaceParm) = mod(div(nr-1,lp.rbkS[id]),lp.rbk[id])
|
@inline cntr(nr, id::Int64, lp::SpaceParm) = mod(div(nr-1,lp.rbkS[id]),lp.rbk[id])
|
||||||
|
@ -163,11 +180,22 @@ end
|
||||||
return pt
|
return pt
|
||||||
end
|
end
|
||||||
|
|
||||||
@inline function point_idx(pt::NTuple{4, Int64}, lp::SpaceParm)
|
@inline function point_time(p::NTuple{2,Int64}, lp::SpaceParm)
|
||||||
|
|
||||||
|
@inline cntb(nb, id::Int64, lp::SpaceParm) = mod(div(nb-1,lp.blkS[id]),lp.blk[id])
|
||||||
|
@inline cntr(nr, id::Int64, lp::SpaceParm) = mod(div(nr-1,lp.rbkS[id]),lp.rbk[id])
|
||||||
|
|
||||||
|
@inline cnt(nb, nr, id::Int64, lp::SpaceParm) = 1 + cntb(nb,id,lp) + cntr(nr,id,lp)*lp.blk[id]
|
||||||
|
|
||||||
|
return cnt(p[1], p[2], 1, lp)
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
@inline function point_index(pt::NTuple{4, Int64}, lp::SpaceParm)
|
||||||
|
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
export up, dw, updw, global_point, point_idx
|
export up, dw, updw, global_point, point_index, point_coord, point_time
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
|
@ -20,7 +20,7 @@ function add_zth_term(ymws::YMworkspace, U, lp)
|
||||||
CUDA.@sync begin
|
CUDA.@sync begin
|
||||||
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_zth!(ymws.frc1,ymws.frc2,U,lp)
|
CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_add_zth!(ymws.frc1,ymws.frc2,U,lp)
|
||||||
end
|
end
|
||||||
ymws.frc1 .= (5/6).*ymws.frc1 .+ ymws.frc2
|
ymws.frc1 .= ymws.frc2
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
@ -30,14 +30,15 @@ function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::S
|
||||||
b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
|
b, r = CUDA.threadIdx().x, CUDA.blockIdx().x
|
||||||
|
|
||||||
Ush = @cuStaticSharedMem(TG, D)
|
Ush = @cuStaticSharedMem(TG, D)
|
||||||
Ush = @cuStaticSharedMem(TA, D)
|
Fsh = @cuStaticSharedMem(TA, D)
|
||||||
|
|
||||||
@inbounds for id in 1:N
|
@inbounds for id in 1:N
|
||||||
Ush[b] = U[b,id,r]
|
Ush[b] = U[b,id,r]
|
||||||
Fsh[b] = Frc[b,id,r]
|
Fsh[b] = frc[b,id,r]
|
||||||
sync_threads()
|
sync_threads()
|
||||||
|
|
||||||
bu, ru, bd, rd = updw((b,r), id, lp)
|
bu, ru = up((b,r), id, lp)
|
||||||
|
bd, rd = dw((b,r), id, lp)
|
||||||
|
|
||||||
if ru == r
|
if ru == r
|
||||||
X = Fsh[bu]
|
X = Fsh[bu]
|
||||||
|
@ -52,8 +53,8 @@ function krnl_add_zth!(frc, frc2::AbstractArray{TA}, U::AbstractArray{TG}, lp::S
|
||||||
Ud = U[bd,id,rd]
|
Ud = U[bd,id,rd]
|
||||||
end
|
end
|
||||||
|
|
||||||
frc2[b,id,r] = (1/6)*(projalg(Ud[b]\Y*Ud[b]) +
|
frc2[b,id,r] = (5/6)*Fsh[b] + (1/6)*(projalg(Ud\Y*Ud) +
|
||||||
projalg(Ush[b]*X/Ush[b]))
|
projalg(Ush[b]*X/Ush[b]))
|
||||||
end
|
end
|
||||||
|
|
||||||
return nothing
|
return nothing
|
||||||
|
@ -76,29 +77,29 @@ end
|
||||||
function flw_rk3(U, ns, eps, c0, lp::SpaceParm, ymws::YMworkspace; add_zth=false)
|
function flw_rk3(U, ns, eps, c0, lp::SpaceParm, ymws::YMworkspace; add_zth=false)
|
||||||
|
|
||||||
for i in 1:ns
|
for i in 1:ns
|
||||||
c0 = eps/2
|
e0 = eps/2
|
||||||
force_gauge(ymws, U, c0, lp)
|
force_gauge(ymws, U, c0, lp)
|
||||||
if add_zth
|
if add_zth
|
||||||
add_zth_term(ymws::YMworkspace, U, lp)
|
add_zth_term(ymws::YMworkspace, U, lp)
|
||||||
end
|
end
|
||||||
ymws.mom .= ymws.frc1
|
ymws.mom .= ymws.frc1
|
||||||
U .= expm.(U, ymws.mom, c0)
|
U .= expm.(U, ymws.mom, e0)
|
||||||
|
|
||||||
c0 = -34*eps/36
|
e0 = -34*eps/36
|
||||||
c1 = 16*eps/9
|
e1 = 16*eps/9
|
||||||
force_gauge(ymws, U, c0, lp)
|
force_gauge(ymws, U, c0, lp)
|
||||||
if add_zth
|
if add_zth
|
||||||
add_zth_term(ymws::YMworkspace, U, lp)
|
add_zth_term(ymws::YMworkspace, U, lp)
|
||||||
end
|
end
|
||||||
ymws.mom .= c0.*ymws.mom .+ c1.*ymws.frc1
|
ymws.mom .= e0.*ymws.mom .+ e1.*ymws.frc1
|
||||||
U .= expm.(U, ymws.mom)
|
U .= expm.(U, ymws.mom)
|
||||||
|
|
||||||
c1 = 6*eps/4
|
e1 = 6*eps/4
|
||||||
force_gauge(ymws, U, c0, lp)
|
force_gauge(ymws, U, c0, lp)
|
||||||
if add_zth
|
if add_zth
|
||||||
add_zth_term(ymws::YMworkspace, U, lp)
|
add_zth_term(ymws::YMworkspace, U, lp)
|
||||||
end
|
end
|
||||||
ymws.mom .= c1.*ymws.frc1 .- ymws.mom
|
ymws.mom .= e1.*ymws.frc1 .- ymws.mom
|
||||||
U .= expm.(U, ymws.mom)
|
U .= expm.(U, ymws.mom)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ Pkg.activate("/lhome/ific/a/alramos/s.images/julia/workspace/LatticeGPU")
|
||||||
using LatticeGPU
|
using LatticeGPU
|
||||||
|
|
||||||
# Set lattice/block size
|
# Set lattice/block size
|
||||||
lp = SpaceParm{4}((96,96,32,32), (4,4,4,4))
|
lp = SpaceParm{4}((32,32,32,32), (4,4,4,4))
|
||||||
println("Space Parameters: ", lp)
|
println("Space Parameters: ", lp)
|
||||||
|
|
||||||
# Seed RNG
|
# Seed RNG
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue