clean examples

arhik · arhik · commit fdbe53f527f8 · 2024-07-08T19:37:25.000+05:30
diff --git a/examples/cast_kernel.jl b/examples/cast_kernel.jl
@@ -1,4 +1,5 @@
 using WGPUCompute
+using Test
 
 function cast_kernel(x::WgpuArray{T, N}, out::WgpuArray{S, N}) where {T, S, N}
 	xdim = workgroupDims.x
@@ -15,8 +16,15 @@ function cast(S::DataType, x::WgpuArray{T, N}) where {T, N}
 	return y
 end
 
-x = WgpuArray{Float32}(rand(Float32, 8, 8) .- 0.5f0)
-z = cast(UInt32, x)
+x = rand(Float32, 8, 8) .- 0.5f0
+
+x_gpu = WgpuArray{Float32}(x)
+z_gpu = cast(UInt32, x_gpu)
+z_cpu = z_gpu |> collect
+
+z = UInt32.(x .> 0.0)
+
+@test z ≈ z_cpu
 
 # TODO Bool cast is not working yet
 # y = cast(Bool, x)
diff --git a/examples/clamp_kernel.jl b/examples/clamp_kernel.jl
@@ -1,5 +1,6 @@
 using Revise
 using WGPUCompute
+using Test
 
 function clamp_kernel(x::WgpuArray{T, N}, out::WgpuArray{T, N}, minval::T, maxval::T) where {T, N}
     gId = xDims.x * globalId.y + globalId.x
@@ -17,3 +18,9 @@ end
 x = WgpuArray{Float32, 2}(rand(16, 16))
 
 y = Base.clamp(x, 0.2f0, 0.5f0)
+y_cpu = y |> collect
+
+@testset "Clamp minimum and maximum" begin
+	@test minimum(y_cpu) == 0.2f0
+	@test maximum(y_cpu) == 0.5f0
+end
diff --git a/examples/scan_kernel.jl b/examples/scan_kernel.jl
@@ -41,11 +41,12 @@ end
 
 function naive_prefix_scan(x::WgpuArray{T, N}) where {T, N}
 	y = similar(x)
-	wgsize = div(reduce(*, size(x)), 256)
+	maxthreads = 256
+	wgsize = div(reduce(*, size(x)), maxthreads)
 	p = WgpuArray{T, N}(zeros(wgsize))
 	@wgpukernel(
 		launch=true,
-		workgroupSizes = (256,),
+		workgroupSizes = (maxthreads,),
 		workgroupCount = (wgsize,),
 		shmem = (),
 		naive_prefix_scan_kernel(x, y, p)
@@ -54,15 +55,15 @@ function naive_prefix_scan(x::WgpuArray{T, N}) where {T, N}
 	partials = WgpuArray{T, N}(pscan)
 	@wgpukernel(
 		launch=true,
-		workgroupSizes = (256,),
+		workgroupSizes = (maxthreads,),
 		workgroupCount = (wgsize,),
 		shmem = (),
 		naive_prefix_partials_scatter_kernel(y, partials)
 	)
 	return y
 end
 
-x = WgpuArray{Float32}(rand(Float32, 2^16))
+x = WgpuArray{Float32}(rand(Float32, 2^22))
 z = naive_prefix_scan(x,)
 
 x_cpu = (x |> collect)
diff --git a/examples/tiled_matmul_kernel.jl b/examples/tiled_matmul_kernel.jl
@@ -84,7 +84,10 @@ Base.:*(x::WgpuArray{T, N}, y::WgpuArray{T, N})  where {T, N} = tiled_matmul(x,
 
 z = x*y
 
-z_cpu = (x |> collect)*(y |> collect)
+x_cpu = (x |> collect);
+y_cpu = (y |> collect);
+
+z_cpu = x_cpu*y_cpu
 
 @test z_cpu ≈ (z |> collect)