`warn` to `warning` (#6952)

`warn` is deprecated, see https://docs.python.org/3/library/logging.html#logging.Logger.warning ```DeprecationWarning: The 'warn' method is deprecated, use 'warning' instead```
2025-01-15 23:08:56 +01:00 · 2025-01-15 23:08:56 +01:00 · 05eaf3d1ca
--- a/accelerator/real_accelerator.py
+++ b/accelerator/real_accelerator.py
@ -178,7 +178,7 @@ def get_accelerator():
        if accelerator_name is None:
            # borrow this log from PR#5084
            if accel_logger is not None:
-                accel_logger.warn(
+                accel_logger.warning(
                    "Setting accelerator to CPU. If you have GPU or other accelerator, we were unable to detect it.")
            # cpu added as catch-all when accelerator detection fails
            accelerator_name = "cpu"
--- a/deepspeed/runtime/base_optimizer.py
+++ b/deepspeed/runtime/base_optimizer.py
@ -28,7 +28,7 @@ class ZeROOptimizer(DeepSpeedOptimizer):

        tp_rank = bwc_tensor_model_parallel_rank(mpu=self.mpu)
        if self.mpu is None:
-            logger.warn("MPU is not provided, setting tp size to 1 in checkpoint loading.")
+            logger.warning("MPU is not provided, setting tp size to 1 in checkpoint loading.")
            tp_world_size = 1
        else:
            tp_world_size = self.mpu.get_slice_parallel_world_size() if hasattr(self.mpu, "get_slice_parallel_world_size") \
--- a/deepspeed/runtime/engine.py
+++ b/deepspeed/runtime/engine.py
@ -3120,7 +3120,7 @@ class DeepSpeedEngine(Module):
                if bf16_mode is not self.bfloat16_enabled():
                    checkpoint_bit16 = BFLOAT16 if bf16_mode else FP16
                    engine_bit16 = BFLOAT16 if self.bfloat16_enabled() else FP16
-                    logger.warn(f'Loading {checkpoint_bit16} zero checkpoints into {engine_bit16} training engine')
+                    logger.warning(f'Loading {checkpoint_bit16} zero checkpoints into {engine_bit16} training engine')
                return self._get_all_zero_checkpoint_state_dicts(zero_ckpt_names)

        return None
@ -3276,7 +3276,7 @@ class DeepSpeedEngine(Module):

                    local_expert_id = None
                    if not m:
-                        logger.warn(f'No expert found in key {key}.')
+                        logger.warning(f'No expert found in key {key}.')
                    else:
                        local_expert_id = m.group(1)

--- a/deepspeed/runtime/lr_schedules.py
+++ b/deepspeed/runtime/lr_schedules.py
@ -508,7 +508,7 @@ class OneCycle(object):
    def _initialize_momentum(self, optimizer, cycle_min_mom, cycle_max_mom, decay_mom_rate, last_batch_iteration):
        if 'betas' not in optimizer.defaults:
            optimizer_name = type(optimizer).__name__
-            logger.warn(
+            logger.warning(
                f"cycle_momentum is disabled because optimizer {optimizer_name} does not support momentum, no betas attribute in defaults"
            )
            self.cycle_momentum = False
--- a/deepspeed/runtime/zero/stage_1_and_2.py
+++ b/deepspeed/runtime/zero/stage_1_and_2.py
@ -614,7 +614,7 @@ class DeepSpeedZeroOptimizer(ZeROOptimizer):
            assert self.contiguous_gradients, "Contiguous Gradients in ZeRO Stage 2 must be set to True for MoE. Other code paths are not tested with MoE"
        # NOTE: To run ZeRO stage 1 with MoE, we need to set self.contiguous_gradients to True or ignore the assertion
        if not self.partition_gradients and not self.contiguous_gradients:
-            logger.warn(
+            logger.warning(
                "ZeRO Stage 1 has not been thoroughly tested with MoE. This configuration is still experimental.")
        assert self.reduce_scatter, "Reduce Scatter in ZeRO Stage 2 must be set to True for MoE. Other code paths are not tested with MoE"