From dea04bccff75d0c8fee74fb5013c16914cafdb29 Mon Sep 17 00:00:00 2001 From: "Fernando P. Panadero" Date: Mon, 20 Nov 2023 14:39:52 +0100 Subject: [PATCH] CG! scalar product and typos fixed --- src/Solvers/CG.jl | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/src/Solvers/CG.jl b/src/Solvers/CG.jl index 4d58916..8e86c7b 100644 --- a/src/Solvers/CG.jl +++ b/src/Solvers/CG.jl @@ -14,21 +14,43 @@ Solves the linear equation `Ax = si` """ -function CG!(si, U, m0, A, lp::SpaceParm, dws::DiracWorkspace) +function krnl_dot!(sum,fone,ftwo) + b=Int64(CUDA.threadIdx().x) + r=Int64(CUDA.blockIdx().x) + + sum[b,r] = dot(fone[b,r],ftwo[b,r]) + +return nothing +end + +function field_dot(fone::AbstractArray,ftwo::AbstractArray,sumf,lp) where {T} + + CUDA.@sync begin + CUDA.@cuda threads=lp.bsz blocks=lp.rsz krnl_dot!(sumf,fone,ftwo) + end + + return sum(sumf) +end + +function CG!(si, U, A, dpar::DiracParam, lp::SpaceParm, dws::DiracWorkspace{T}, maxiter::Int64 = 10, tol=1.0) where {T} dws.sr .= si dws.sp .= si norm = CUDA.mapreduce(x -> norm2(x), +, si) - fill!(si,zero(eltype(so))) + fill!(si,zero(eltype(si))) err = 0.0 - - tol = eps * norm + + tol = tol * norm + iterations = 0 + sumf = scalar_field(Complex{T}, lp) niter = 0 for i in 1:maxiter - A(dws.sAp, U, dws.sp, am0, dws.st, lp) - prod = CUDA.mapreduce(x -> dot(x[1],x[2]), +, zip(dws.sp, dws.sAp)) + A(dws.sAp, U, dws.sp, dpar, dws, lp) + + prod = field_dot(dws.sp,dws.sAp,sumf,lp) + alpha = norm/prod si .= si .+ alpha .* dws.sp @@ -52,4 +74,4 @@ function CG!(si, U, m0, A, lp::SpaceParm, dws::DiracWorkspace) end return niter -end +end \ No newline at end of file