Log operator warnings only in verbose mode (#5917)

2024-08-13 21:10:17 -04:00 · 2024-08-13 21:10:17 -04:00 · 0f2d485c27
--- a/op_builder/evoformer_attn.py
+++ b/op_builder/evoformer_attn.py
@ -41,18 +41,21 @@ class EvoformerAttnBuilder(CUDAOpBuilder):
        args.append(f"-DGPU_ARCH={major}{minor}")
        return args

-    def is_compatible(self, verbose=True):
+    def is_compatible(self, verbose=False):
        try:
            import torch
        except ImportError:
-            self.warning("Please install torch if trying to pre-compile kernels")
+            if verbose:
+                self.warning("Please install torch if trying to pre-compile kernels")
            return False
        if self.cutlass_path is None:
-            self.warning("Please specify the CUTLASS repo directory as environment variable $CUTLASS_PATH")
+            if verbose:
+                self.warning("Please specify the CUTLASS repo directory as environment variable $CUTLASS_PATH")
            return False
        with open(f'{self.cutlass_path}/CHANGELOG.md', 'r') as f:
            if '3.1.0' not in f.read():
-                self.warning("Please use CUTLASS version >= 3.1.0")
+                if verbose:
+                    self.warning("Please use CUTLASS version >= 3.1.0")
                return False
        cuda_okay = True
        if not self.is_rocm_pytorch() and torch.cuda.is_available():  #ignore-cuda
@ -60,10 +63,12 @@ class EvoformerAttnBuilder(CUDAOpBuilder):
            torch_cuda_major = int(torch.version.cuda.split('.')[0])
            cuda_capability = torch.cuda.get_device_properties(0).major  #ignore-cuda
            if cuda_capability < 7:
-                self.warning("Please use a GPU with compute capability >= 7.0")
+                if verbose:
+                    self.warning("Please use a GPU with compute capability >= 7.0")
                cuda_okay = False
            if torch_cuda_major < 11 or sys_cuda_major < 11:
-                self.warning("Please use CUDA 11+")
+                if verbose:
+                    self.warning("Please use CUDA 11+")
                cuda_okay = False
        return super().is_compatible(verbose) and cuda_okay

--- a/op_builder/fp_quantizer.py
+++ b/op_builder/fp_quantizer.py
@ -22,11 +22,12 @@ class FPQuantizerBuilder(CUDAOpBuilder):
    def absolute_name(self):
        return f'deepspeed.ops.fp_quantizer.{self.NAME}_op'

-    def is_compatible(self, verbose=True):
+    def is_compatible(self, verbose=False):
        try:
            import torch
        except ImportError:
-            self.warning("Please install torch if trying to pre-compile inference kernels")
+            if verbose:
+                self.warning("Please install torch if trying to pre-compile inference kernels")
            return False

        cuda_okay = True
@ -35,17 +36,20 @@ class FPQuantizerBuilder(CUDAOpBuilder):
            torch_cuda_major = int(torch.version.cuda.split('.')[0])
            cuda_capability = torch.cuda.get_device_properties(0).major  #ignore-cuda
            if cuda_capability < 8:
-                self.warning("NVIDIA Inference is only supported on Ampere and newer architectures")
+                if verbose:
+                    self.warning("NVIDIA Inference is only supported on Ampere and newer architectures")
                cuda_okay = False
            if cuda_capability >= 8:
                if torch_cuda_major < 11 or sys_cuda_major < 11:
-                    self.warning("On Ampere and higher architectures please use CUDA 11+")
+                    if verbose:
+                        self.warning("On Ampere and higher architectures please use CUDA 11+")
                    cuda_okay = False

        try:
            import triton
        except ImportError:
-            self.warning(f"please install triton==2.3.0 or 2.3.1 if you want to use the FP Quantizer Kernels")
+            if verbose:
+                self.warning(f"please install triton==2.3.0 or 2.3.1 if you want to use the FP Quantizer Kernels")
            return False

        # triton 2.3.0 and 2.3.1 are okay and the only versions released in 2.3.x before 3.x was released
@ -59,9 +63,10 @@ class FPQuantizerBuilder(CUDAOpBuilder):
            triton_mismatch = major != "2" or minor != "3"

        if triton_mismatch:
-            self.warning(
-                f"FP Quantizer is using an untested triton version ({installed_triton}), only 2.3.0 and 2.3.1 are known to be compatible with these kernels"
-            )
+            if verbose:
+                self.warning(
+                    f"FP Quantizer is using an untested triton version ({installed_triton}), only 2.3.0 and 2.3.1 are known to be compatible with these kernels"
+                )
            return False

        return super().is_compatible(verbose) and cuda_okay
--- a/op_builder/inference_core_ops.py
+++ b/op_builder/inference_core_ops.py
@ -23,7 +23,8 @@ class InferenceCoreBuilder(CUDAOpBuilder):
        try:
            import torch
        except ImportError:
-            self.warning("Please install torch if trying to pre-compile inference kernels")
+            if verbose:
+                self.warning("Please install torch if trying to pre-compile inference kernels")
            return False

        cuda_okay = True
@ -32,11 +33,13 @@ class InferenceCoreBuilder(CUDAOpBuilder):
            torch_cuda_major = int(torch.version.cuda.split('.')[0])
            cuda_capability = torch.cuda.get_device_properties(0).major  #ignore-cuda
            if cuda_capability < 6:
-                self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
+                if verbose:
+                    self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
                cuda_okay = False
            if cuda_capability >= 8:
                if torch_cuda_major < 11 or sys_cuda_major < 11:
-                    self.warning("On Ampere and higher architectures please use CUDA 11+")
+                    if verbose:
+                        self.warning("On Ampere and higher architectures please use CUDA 11+")
                    cuda_okay = False
        return super().is_compatible(verbose) and cuda_okay

--- a/op_builder/inference_cutlass_builder.py
+++ b/op_builder/inference_cutlass_builder.py
@ -22,7 +22,8 @@ class InferenceCutlassBuilder(CUDAOpBuilder):
        try:
            import torch
        except ImportError:
-            self.warning("Please install torch if trying to pre-compile inference kernels")
+            if verbose:
+                self.warning("Please install torch if trying to pre-compile inference kernels")
            return False

        cuda_okay = True
@ -31,11 +32,13 @@ class InferenceCutlassBuilder(CUDAOpBuilder):
            torch_cuda_major = int(torch.version.cuda.split('.')[0])
            cuda_capability = torch.cuda.get_device_properties(0).major  #ignore-cuda
            if cuda_capability < 6:
-                self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
+                if verbose:
+                    self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
                cuda_okay = False
            if cuda_capability >= 8:
                if torch_cuda_major < 11 or sys_cuda_major < 11:
-                    self.warning("On Ampere and higher architectures please use CUDA 11+")
+                    if verbose:
+                        self.warning("On Ampere and higher architectures please use CUDA 11+")
                    cuda_okay = False
        return super().is_compatible(verbose) and cuda_okay

--- a/op_builder/ragged_ops.py
+++ b/op_builder/ragged_ops.py
@ -23,7 +23,8 @@ class RaggedOpsBuilder(CUDAOpBuilder):
        try:
            import torch
        except ImportError:
-            self.warning("Please install torch if trying to pre-compile inference kernels")
+            if verbose:
+                self.warning("Please install torch if trying to pre-compile inference kernels")
            return False

        cuda_okay = True
@ -32,11 +33,13 @@ class RaggedOpsBuilder(CUDAOpBuilder):
            torch_cuda_major = int(torch.version.cuda.split('.')[0])
            cuda_capability = torch.cuda.get_device_properties(0).major  #ignore-cuda
            if cuda_capability < 6:
-                self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
+                if verbose:
+                    self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
                cuda_okay = False
            if cuda_capability >= 8:
                if torch_cuda_major < 11 or sys_cuda_major < 11:
-                    self.warning("On Ampere and higher architectures please use CUDA 11+")
+                    if verbose:
+                        self.warning("On Ampere and higher architectures please use CUDA 11+")
                    cuda_okay = False
        return super().is_compatible(verbose) and cuda_okay

--- a/op_builder/ragged_utils.py
+++ b/op_builder/ragged_utils.py
@ -23,7 +23,8 @@ class RaggedUtilsBuilder(CUDAOpBuilder):
        try:
            import torch
        except ImportError:
-            self.warning("Please install torch if trying to pre-compile inference kernels")
+            if verbose:
+                self.warning("Please install torch if trying to pre-compile inference kernels")
            return False

        cuda_okay = True
@ -32,11 +33,13 @@ class RaggedUtilsBuilder(CUDAOpBuilder):
            torch_cuda_major = int(torch.version.cuda.split('.')[0])
            cuda_capability = torch.cuda.get_device_properties(0).major  #ignore-cuda
            if cuda_capability < 6:
-                self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
+                if verbose:
+                    self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
                cuda_okay = False
            if cuda_capability >= 8:
                if torch_cuda_major < 11 or sys_cuda_major < 11:
-                    self.warning("On Ampere and higher architectures please use CUDA 11+")
+                    if verbose:
+                        self.warning("On Ampere and higher architectures please use CUDA 11+")
                    cuda_okay = False
        return super().is_compatible(verbose) and cuda_okay

--- a/op_builder/sparse_attn.py
+++ b/op_builder/sparse_attn.py
@ -27,45 +27,51 @@ class SparseAttnBuilder(OpBuilder):
    def cxx_args(self):
        return ['-O2', '-fopenmp']

-    def is_compatible(self, verbose=True):
+    def is_compatible(self, verbose=False):
        # Check to see if llvm and cmake are installed since they are dependencies
        #required_commands = ['llvm-config|llvm-config-9', 'cmake']
        #command_status = list(map(self.command_exists, required_commands))
        #deps_compatible = all(command_status)

        if self.is_rocm_pytorch():
-            self.warning(f'{self.NAME} is not compatible with ROCM')
+            if verbose:
+                self.warning(f'{self.NAME} is not compatible with ROCM')
            return False

        try:
            import torch
        except ImportError:
-            self.warning(f"unable to import torch, please install it first")
+            if verbose:
+                self.warning(f"unable to import torch, please install it first")
            return False

        # torch-cpu will not have a cuda version
        if torch.version.cuda is None:
            cuda_compatible = False
-            self.warning(f"{self.NAME} cuda is not available from torch")
+            if verbose:
+                self.warning(f"{self.NAME} cuda is not available from torch")
        else:
            major, minor = torch.version.cuda.split('.')[:2]
            cuda_compatible = (int(major) == 10 and int(minor) >= 1) or (int(major) >= 11)
            if not cuda_compatible:
-                self.warning(f"{self.NAME} requires CUDA version 10.1+")
+                if verbose:
+                    self.warning(f"{self.NAME} requires CUDA version 10.1+")

        TORCH_MAJOR = int(torch.__version__.split('.')[0])
        TORCH_MINOR = int(torch.__version__.split('.')[1])
        torch_compatible = (TORCH_MAJOR == 1 and TORCH_MINOR >= 5)
        if not torch_compatible:
-            self.warning(
-                f'{self.NAME} requires a torch version >= 1.5 and < 2.0 but detected {TORCH_MAJOR}.{TORCH_MINOR}')
+            if verbose:
+                self.warning(
+                    f'{self.NAME} requires a torch version >= 1.5 and < 2.0 but detected {TORCH_MAJOR}.{TORCH_MINOR}')

        try:
            import triton
        except ImportError:
            # auto-install of triton is broken on some systems, reverting to manual install for now
            # see this issue: https://github.com/microsoft/DeepSpeed/issues/1710
-            self.warning(f"please install triton==1.0.0 if you want to use sparse attention")
+            if verbose:
+                self.warning(f"please install triton==1.0.0 if you want to use sparse attention")
            return False

        if pkg_version:
@ -76,7 +82,9 @@ class SparseAttnBuilder(OpBuilder):
            triton_mismatch = installed_triton != "1.0.0"

        if triton_mismatch:
-            self.warning(f"using untested triton version ({installed_triton}), only 1.0.0 is known to be compatible")
+            if verbose:
+                self.warning(
+                    f"using untested triton version ({installed_triton}), only 1.0.0 is known to be compatible")
            return False

        return super().is_compatible(verbose) and torch_compatible and cuda_compatible
--- a/op_builder/spatial_inference.py
+++ b/op_builder/spatial_inference.py
@ -21,7 +21,8 @@ class SpatialInferenceBuilder(CUDAOpBuilder):
        try:
            import torch
        except ImportError:
-            self.warning("Please install torch if trying to pre-compile inference kernels")
+            if verbose:
+                self.warning("Please install torch if trying to pre-compile inference kernels")
            return False

        cuda_okay = True
@ -31,7 +32,8 @@ class SpatialInferenceBuilder(CUDAOpBuilder):
            cuda_capability = torch.cuda.get_device_properties(0).major
            if cuda_capability >= 8:
                if torch_cuda_major < 11 or sys_cuda_major < 11:
-                    self.warning("On Ampere and higher architectures please use CUDA 11+")
+                    if verbose:
+                        self.warning("On Ampere and higher architectures please use CUDA 11+")
                    cuda_okay = False
        return super().is_compatible(verbose) and cuda_okay

--- a/op_builder/transformer_inference.py
+++ b/op_builder/transformer_inference.py
@ -21,7 +21,8 @@ class InferenceBuilder(CUDAOpBuilder):
        try:
            import torch
        except ImportError:
-            self.warning("Please install torch if trying to pre-compile inference kernels")
+            if verbose:
+                self.warning("Please install torch if trying to pre-compile inference kernels")
            return False

        cuda_okay = True
@ -30,11 +31,13 @@ class InferenceBuilder(CUDAOpBuilder):
            torch_cuda_major = int(torch.version.cuda.split('.')[0])
            cuda_capability = torch.cuda.get_device_properties(0).major
            if cuda_capability < 6:
-                self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
+                if verbose:
+                    self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
                cuda_okay = False
            if cuda_capability >= 8:
                if torch_cuda_major < 11 or sys_cuda_major < 11:
-                    self.warning("On Ampere and higher architectures please use CUDA 11+")
+                    if verbose:
+                        self.warning("On Ampere and higher architectures please use CUDA 11+")
                    cuda_okay = False
        return super().is_compatible(verbose) and cuda_okay