зеркало из https://github.com/microsoft/DeepSpeed.git
`warn` to `warning` (#6952)
`warn` is deprecated, see https://docs.python.org/3/library/logging.html#logging.Logger.warning ```DeprecationWarning: The 'warn' method is deprecated, use 'warning' instead```
This commit is contained in:
Родитель
fae714d6bd
Коммит
05eaf3d1ca
|
@ -178,7 +178,7 @@ def get_accelerator():
|
|||
if accelerator_name is None:
|
||||
# borrow this log from PR#5084
|
||||
if accel_logger is not None:
|
||||
accel_logger.warn(
|
||||
accel_logger.warning(
|
||||
"Setting accelerator to CPU. If you have GPU or other accelerator, we were unable to detect it.")
|
||||
# cpu added as catch-all when accelerator detection fails
|
||||
accelerator_name = "cpu"
|
||||
|
|
|
@ -28,7 +28,7 @@ class ZeROOptimizer(DeepSpeedOptimizer):
|
|||
|
||||
tp_rank = bwc_tensor_model_parallel_rank(mpu=self.mpu)
|
||||
if self.mpu is None:
|
||||
logger.warn("MPU is not provided, setting tp size to 1 in checkpoint loading.")
|
||||
logger.warning("MPU is not provided, setting tp size to 1 in checkpoint loading.")
|
||||
tp_world_size = 1
|
||||
else:
|
||||
tp_world_size = self.mpu.get_slice_parallel_world_size() if hasattr(self.mpu, "get_slice_parallel_world_size") \
|
||||
|
|
|
@ -3120,7 +3120,7 @@ class DeepSpeedEngine(Module):
|
|||
if bf16_mode is not self.bfloat16_enabled():
|
||||
checkpoint_bit16 = BFLOAT16 if bf16_mode else FP16
|
||||
engine_bit16 = BFLOAT16 if self.bfloat16_enabled() else FP16
|
||||
logger.warn(f'Loading {checkpoint_bit16} zero checkpoints into {engine_bit16} training engine')
|
||||
logger.warning(f'Loading {checkpoint_bit16} zero checkpoints into {engine_bit16} training engine')
|
||||
return self._get_all_zero_checkpoint_state_dicts(zero_ckpt_names)
|
||||
|
||||
return None
|
||||
|
@ -3276,7 +3276,7 @@ class DeepSpeedEngine(Module):
|
|||
|
||||
local_expert_id = None
|
||||
if not m:
|
||||
logger.warn(f'No expert found in key {key}.')
|
||||
logger.warning(f'No expert found in key {key}.')
|
||||
else:
|
||||
local_expert_id = m.group(1)
|
||||
|
||||
|
|
|
@ -508,7 +508,7 @@ class OneCycle(object):
|
|||
def _initialize_momentum(self, optimizer, cycle_min_mom, cycle_max_mom, decay_mom_rate, last_batch_iteration):
|
||||
if 'betas' not in optimizer.defaults:
|
||||
optimizer_name = type(optimizer).__name__
|
||||
logger.warn(
|
||||
logger.warning(
|
||||
f"cycle_momentum is disabled because optimizer {optimizer_name} does not support momentum, no betas attribute in defaults"
|
||||
)
|
||||
self.cycle_momentum = False
|
||||
|
|
|
@ -614,7 +614,7 @@ class DeepSpeedZeroOptimizer(ZeROOptimizer):
|
|||
assert self.contiguous_gradients, "Contiguous Gradients in ZeRO Stage 2 must be set to True for MoE. Other code paths are not tested with MoE"
|
||||
# NOTE: To run ZeRO stage 1 with MoE, we need to set self.contiguous_gradients to True or ignore the assertion
|
||||
if not self.partition_gradients and not self.contiguous_gradients:
|
||||
logger.warn(
|
||||
logger.warning(
|
||||
"ZeRO Stage 1 has not been thoroughly tested with MoE. This configuration is still experimental.")
|
||||
assert self.reduce_scatter, "Reduce Scatter in ZeRO Stage 2 must be set to True for MoE. Other code paths are not tested with MoE"
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче