1 file changed
+1
-1
lines changed- CHANGELOG.md+7-2
- CMakeLists.txt+24-3
- PUBLICATIONS.md+7
- README.md+9-4
- cmake/version.h.in-38
- cmake/version_extended.h.in+34
- examples/02_dump_reg_shmem/CMakeLists.txt+1
- examples/08_turing_tensorop_gemm/turing_tensorop_gemm.cu+2-2
- examples/56_hopper_ptr_array_batched_gemm/56_hopper_ptr_array_batched_gemm.cu+7-7
- examples/56_hopper_ptr_array_batched_gemm/CMakeLists.txt+10-8
- examples/57_hopper_grouped_gemm/57_hopper_grouped_gemm.cu+96-49
- examples/57_hopper_grouped_gemm/CMakeLists.txt+10
- include/cute/arch/copy_sm90_desc.hpp+1-1
- include/cute/atom/mma_atom.hpp+2
- include/cute/util/print.hpp+2-2
- include/cute/util/type_traits.hpp+3
- include/cutlass/arch/mma_sm90.h+4
- include/cutlass/bfloat16.h+1
- include/cutlass/detail/layout.hpp+35-1
- include/cutlass/epilogue/collective/builders/sm90_builder.inl+12-7
- include/cutlass/epilogue/collective/default_epilogue.hpp+1
- include/cutlass/epilogue/collective/default_epilogue_array.hpp+32-18
- include/cutlass/epilogue/collective/sm90_epilogue_tma_warpspecialized.hpp+76-38
- include/cutlass/epilogue/dispatch_policy.hpp+1-2
- include/cutlass/epilogue/fusion/sm90_callbacks_tma_warpspecialized.hpp+28
- include/cutlass/epilogue/fusion/sm90_visitor_store_tma_warpspecialized.hpp+1
- include/cutlass/epilogue/thread/linear_combination.h+57-12
- include/cutlass/epilogue/threadblock/default_epilogue_tensor_op_row_broadcast.h-183
- include/cutlass/epilogue/threadblock/predicated_tile_iterator_row_broadcast.h-519
- include/cutlass/gemm/collective/builders/sm90_gmma_builder.inl+4-8
- include/cutlass/gemm/collective/sm90_mma_array_tma_gmma_ss_warpspecialized.hpp+45-29
- include/cutlass/gemm/device/gemm_sparse_row_broadcast.h-514
- include/cutlass/gemm/dispatch_policy.hpp+4-7
- include/cutlass/gemm/group_array_problem_shape.hpp+12
- include/cutlass/gemm/kernel/default_gemm_sparse_row_broadcast.h-191
- include/cutlass/gemm/kernel/sm90_gemm_array_tma_warpspecialized_cooperative.hpp+30-35
- include/cutlass/gemm/kernel/sm90_gemm_tma.hpp+5-7
- include/cutlass/gemm/kernel/sm90_gemm_tma_warpspecialized.hpp+5-7
- include/cutlass/gemm/kernel/sm90_gemm_tma_warpspecialized_cooperative.hpp+5-7
- include/cutlass/gemm/kernel/sm90_gemm_tma_warpspecialized_pingpong.hpp+5-7
- include/cutlass/gemm/kernel/sm90_gemm_warpspecialized.hpp+5-7
- include/cutlass/gemm/kernel/sm90_gemm_warpspecialized_cooperative.hpp+5-7
- include/cutlass/gemm/kernel/sm90_gemm_warpspecialized_pingpong.hpp+5-7
- include/cutlass/gemm/kernel/sm90_tile_scheduler_group.hpp+140-86
- include/cutlass/gemm/kernel/sparse_gemm_row_broadcast.h-400
- include/cutlass/gemm/kernel/tile_scheduler_params.h+14-6
- include/cutlass/version.h+80
- pyproject.toml+2-2
- python/cutlass/__init__.py+3-3
- python/cutlass/backend/c_types.py+6-2
- python/cutlass/backend/epilogue.py+23-1
- python/cutlass/backend/evt/frontend/frontend_base.py+2-2
- python/cutlass/backend/evt/passes/graph_drawer.py-16
- python/cutlass/backend/gemm_operation.py+28-18
- python/setup_library.py+1-1
- python/setup_pycute.py+1-1
- test/unit/gemm/device/CMakeLists.txt+1
- test/unit/gemm/device/gemm_f16n_f16n_f16t_tensor_op_f32_sparse_sm80.cu-19
- test/unit/gemm/device/sm90_gemm_f16_f16_f16_tensor_op_f32_cluster_warpspecialized_cooperative_aux_store.cu+685
- test/unit/gemm/device/testbed_sparse.h+7-20
- tools/util/include/cutlass/util/packed_stride.hpp+1-1
0 commit comments