[inductor] check block options after broadcasting and singleton dims have been removed (pytorch#161602)

kundaMwiza · pytorchmergebot · commit b994f6e3b331 · 2025-08-30T08:10:51.000Z
This will allow for some more cases to use tensor descriptors e.g. before the following block params would not match because the innermost dimension does not have stride 1 ```python block_params=BlockParameters(shape=[64, 4, 1, 1], block_shape=[((XBLOCK + 3)//4), Min(4, XBLOCK), 1, 1], strides=[0, 1, 0, 0], offsets=[(xoffset//4), ModularIndexing(xoffset, 1, 4), 0, 0]) ``` After broadcasting dimensions and singleton dimensions are removed: ```python block_params=BlockParameters(shape=[4], block_shape=[Min(4, XBLOCK)], strides=[1], offsets=[ModularIndexing(xoffset, 1, 4)]) ``` Pull Request resolved: pytorch#161602 Approved by: https://github.com/jansel
diff --git a/test/inductor/test_torchinductor_strided_blocks.py b/test/inductor/test_torchinductor_strided_blocks.py
@@ -78,7 +78,6 @@ def xfail_if_use_tensor_descriptor(fn):
         "test_2d_reduction_odd_shapes_view_size1_num_block_pointers_3_num_triton_kernels_2_reduction_op1",
         "test_broadcast_prefer_nd_tiling_False_x_size0_y_size0",
         "test_broadcast_prefer_nd_tiling_False_x_size2_y_size2",
-        "test_broadcast_prefer_nd_tiling_False_x_size3_y_size3",
         "test_broadcast_prefer_nd_tiling_True_x_size0_y_size0",
         "test_broadcast_prefer_nd_tiling_True_x_size2_y_size2",
         "test_broadcast_with_singleton_dims",
diff --git a/torch/_inductor/codegen/triton.py b/torch/_inductor/codegen/triton.py
@@ -2298,27 +2298,31 @@ def match_block_expr() -> Optional[BlockDescriptorOptions]:
 
                 # Form the block pointer or TMA descriptor.
                 self.filter_masks(mask_vars)
-                options_class: type[BlockDescriptorOptions]
-                if config.triton.use_block_ptr:
-                    options_class = BlockPtrOptions
-                else:
+
+                options_class = (
+                    BlockPtrOptions
+                    if config.triton.use_block_ptr
+                    else TensorDescriptorOptions
+                )
+                options = options_class.create(
+                    params=block_params,
+                    constant_offset=offset,
+                    range_trees=range_trees,
+                    mask_vars=mask_vars,
+                    get_max_block=self.max_block,
+                )
+
+                if options_class == TensorDescriptorOptions:
                     nonlocal tma_compatibility_checker
                     tma_compatibility_checker = cast(
                         TMACompatibilityChecker, tma_compatibility_checker
                     )
                     if not tma_compatibility_checker.are_block_parameters_compatible(
-                        block_params
+                        options.params
                     ):
                         return None
-                    options_class = TensorDescriptorOptions
 
-                return options_class.create(
-                    params=block_params,
-                    constant_offset=offset,
-                    range_trees=range_trees,
-                    mask_vars=mask_vars,
-                    get_max_block=self.max_block,
-                )
+                return options
 
             # Return a block pointer, if indexing matches the pattern.
             options = match_block_expr()