diff --git a/archai/nlp/models/hf_gpt2/config_hf_gpt2.py b/archai/nlp/models/hf_gpt2/config_hf_gpt2.py index bc787893..ddf565d1 100644 --- a/archai/nlp/models/hf_gpt2/config_hf_gpt2.py +++ b/archai/nlp/models/hf_gpt2/config_hf_gpt2.py @@ -33,7 +33,7 @@ class HfGPT2Config(Config, PretrainedConfig): d_inner: Optional[int] = 2048, dropout: Optional[float] = 0.1, dropatt: Optional[float] = 0.0, - weight_init_std: Optional[float] = 0.0, + weight_init_std: Optional[float] = 0.02, n_layer: Optional[int] = 16, n_head: Optional[int] = 8, embd_pdrop: Optional[float] = 0.0, @@ -111,7 +111,7 @@ class HfGPT2FlexConfig(HfGPT2Config): d_inner: Optional[Union[int, List[int]]] = 2048, dropout: Optional[float] = 0.1, dropatt: Optional[float] = 0.0, - weight_init_std: Optional[float] = 0.0, + weight_init_std: Optional[float] = 0.02, n_layer: Optional[int] = 16, n_head: Optional[Union[int, List[int]]] = 8, embd_pdrop: Optional[float] = 0.0,