Skip to content

Commit 14810fd

Browse files
committed
update examples
update examples
1 parent a1b38dc commit 14810fd

File tree

5 files changed

+77
-4
lines changed

5 files changed

+77
-4
lines changed

examples/atomic.jl

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
using Revise
2+
using WGPUCompute
3+
using Test
4+
5+
# Silly example
6+
# Need to comeup with better example
7+
8+
empty!(task_local_storage())
9+
10+
function atomiccount_kernel(hist::WgpuArray{T, N1}, x::WgpuArray{T, N2}, iSize::UInt32) where {T, N1, N2}
11+
gId = workgroupId.x*workgroupId.y + localId.x
12+
stride = workgroupDims.x*workgroupCount.x
13+
@wgpuatomic a::UInt32
14+
val = x[gId]
15+
a = hist[val]
16+
while gId < iSize
17+
val = x[gId]
18+
a += T(1)
19+
gId += stride
20+
end
21+
hist[val] = a
22+
end
23+
24+
function atomiccount(x::WgpuArray{T, N1}, hist::WgpuArray{S, N2}) where {T, S, N1, N2}
25+
y = WgpuArray{UInt32}(undef, nbins)
26+
copyto!(y, hist)
27+
@wgpukernel(
28+
launch=true,
29+
workgroupSizes=(64,),
30+
workgroupCount=(1,),
31+
shmem=(),
32+
atomiccount_kernel(y, x, reduce(*, size(x)) |> UInt32)
33+
)
34+
return y
35+
end
36+
37+
nbins = 10
38+
x = WgpuArray{UInt32}(rand(UInt32, 64) .% nbins)
39+
count = WgpuArray{UInt32}(zeros(UInt32, 10))
40+
41+
z = atomiccount(x, count)
42+
43+
# histogram(x)

examples/cast_kernel.jl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@ function cast_kernel(x::WgpuArray{T, N}, out::WgpuArray{S, N}) where {T, S, N}
1010
out[gId] = S(ceil(x[gId]))
1111
end
1212

13+
function cast_kernel(x::WgpuArray{T, N}, out::WgpuArray{S, N}) where {T, S, N}
14+
gId = xDims.x*globalId.y + globalId.x
15+
out[gId] = S(ceil(x[gId]))
16+
end
17+
1318
function cast(S::DataType, x::WgpuArray{T, N}) where {T, N}
1419
y = WgpuArray{S}(undef, size(x))
1520
@wgpukernel launch=true workgroupSizes=(4, 4) workgroupCount=(2, 2) shmem=() cast_kernel(x, y)

examples/localarray.jl

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
using Revise
2+
using WGPUCompute
3+
using Test
4+
5+
function localarray_kernel(x::WgpuArray{T, N}, out::WgpuArray{S, N}) where {T, S, N}
6+
a = Vec4{Float32}(1.0f0, 2.0f0, 3.0f0, 4.0f0);
7+
gId = xDims.x*globalId.y + globalId.x
8+
out[gId] = S(ceil(x[gId]))
9+
end
10+
11+
function localarray(S::DataType, x::WgpuArray{T, N}) where {T, N}
12+
y = WgpuArray{S}(undef, size(x))
13+
@wgpukernel launch = true workgroupSizes=(4, 4) workgroupCount=(2, 2) shmem=() localarray_kernel(x, y)
14+
return y
15+
end
16+
17+
x = rand(Float32, 8, 8) .- 0.5f0
18+
19+
x_gpu = WgpuArray{Float32}(x)
20+
z_gpu = localarray(UInt32, x_gpu)
21+
z_cpu = z_gpu |> collect
22+
23+
z = UInt32.(x .> 0.0)
24+
25+
@test z z_cpu
26+

examples/naive_reduce_mul.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ z = naive_reduce(x, *)
3737

3838
x_cpu = (x |> collect)
3939

40-
sum_cpu = reduce(*, x_cpu)
41-
sum_gpu = (z|>collect)[1]
40+
mul_cpu = reduce(*, x_cpu)
41+
mul_gpu = (z|>collect)[1]
4242

43-
@test sum_cpu sum_gpu
43+
@test mul_cpu mul_gpu

src/array.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,6 @@ function unsafe_fill!(gpuDevice, dst::WgpuArrayPtr{T}, value::Union{UInt8, Int8}
163163
WGPUCore.writeBuffer(gpuDevice.queue, dst.buffer, fill(value, N))
164164
end
165165

166-
167166
mutable struct WgpuArray{T, N} <: AbstractGPUArray{T, N}
168167
dims::Dims{N}
169168
storageData::Union{Nothing, Vector{T}, Array{T}}

0 commit comments

Comments
 (0)