Correct TF formatting to exclude LayerNorms from weight decay (#4448)
* Exclude LayerNorms from weight decay * Include both formats of layer norm
This commit is contained in:
Родитель
49c06132df
Коммит
e708bb75bf
|
@ -75,7 +75,7 @@ def create_optimizer(init_lr, num_train_steps, num_warmup_steps, end_lr=0.0, opt
|
|||
beta_1=0.9,
|
||||
beta_2=0.999,
|
||||
epsilon=1e-6,
|
||||
exclude_from_weight_decay=["layer_norm", "bias"],
|
||||
exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"],
|
||||
)
|
||||
|
||||
return optimizer
|
||||
|
|
Загрузка…
Ссылка в новой задаче