Remove Constraint for sm89 hardware

drisspg · drisspg · commit 26dba5bdeb04 · 2025-06-01T17:39:08.000-07:00
stack-info: PR: #2281, branch: drisspg/stack/61
diff --git a/.github/workflows/float8_test.yml b/.github/workflows/float8_test.yml
@@ -55,3 +55,4 @@ jobs:
         pip install .
         pytest test/float8 --verbose -s
         pytest test/integration --verbose -s
+        pytest test/dtypes/test_affine_quantized_float.py --verbose -s
diff --git a/test/dtypes/test_affine_quantized_float.py b/test/dtypes/test_affine_quantized_float.py
@@ -76,9 +76,7 @@ class TestAffineQuantizedFloat8Compile(InductorTestCase):
     @common_utils.parametrize("dtype", [torch.bfloat16, torch.float32])
     @common_utils.parametrize("mode", ["dynamic", "weight-only", "static"])
     @common_utils.parametrize("compile", [True, False])
-    @common_utils.parametrize(
-        "granularity", [PerTensor(), PerRow()] if is_sm_at_least_90() else [PerTensor()]
-    )
+    @common_utils.parametrize("granularity", [PerTensor(), PerRow()])
     # Inputs are (M,..), K, N
     @common_utils.parametrize(
         "sizes",
@@ -420,9 +418,7 @@ def test_dequantize_affine_float8_scale_broadcasting(self):
     @unittest.skipIf(
         not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9"
     )
-    @common_utils.parametrize(
-        "granularity", [PerTensor(), PerRow()] if is_sm_at_least_90() else [PerTensor()]
-    )
+    @common_utils.parametrize("granularity", [PerTensor(), PerRow()])
     def test_float8_tensor_slicing_basic(self, granularity):
         """Test basic slicing operations on Float8 tensors"""
         device = "cuda"
@@ -555,9 +551,7 @@ def test_float8_tensor_slicing_edge_cases(self):
     @unittest.skipIf(
         not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9"
     )
-    @common_utils.parametrize(
-        "granularity", [PerTensor(), PerRow()] if is_sm_at_least_90() else [PerTensor()]
-    )
+    @common_utils.parametrize("granularity", [PerTensor(), PerRow()])
     def test_float8_tensor_slicing_functional_correctness(self, granularity):
         """Test that sliced tensors produce correct results in computations"""
         device = "cuda"
diff --git a/torchao/float8/inference.py b/torchao/float8/inference.py
@@ -19,7 +19,6 @@
 from torchao.utils import (
     is_MI300,
     is_sm_at_least_89,
-    is_sm_at_least_90,
 )
 
 Tensor = torch.Tensor
@@ -168,13 +167,11 @@ def _check_hardware_support(
         ValueError: If invalid granularity type is provided
     """
     for _granularity in granularities:
-        if isinstance(_granularity, PerTensor):
-            assert is_sm_at_least_89() or is_MI300(), (
-                "PerTensor quantization only works for CUDA>=8.9 and MI300+"
-            )
-        elif isinstance(_granularity, PerRow):
-            assert is_sm_at_least_90() or is_MI300(), (
-                "PerRow quantization only works for CUDA>=9.0 and MI300+"
+        if not isinstance(_granularity, (PerTensor, PerRow)):
+            raise ValueError(
+                f"Invalid granularity type: {_granularity}, only PerTensor or PerRow are supported."
             )
-        else:
-            raise ValueError(f"Invalid granularity type: {_granularity}")
+
+        assert is_sm_at_least_89() or is_MI300(), (
+            "Float8 dynamic quantization requires CUDA compute capability ≥8.9 or MI300+."
+        )