This commit is contained in:
Shuxin Zheng 2021-06-10 18:04:55 +08:00 коммит произвёл GitHub
Родитель 9537cf59a8
Коммит 7bdc032d5d
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
1 изменённых файлов: 44 добавлений и 0 удалений

44
model.py Normal file
Просмотреть файл

@ -0,0 +1,44 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
from data import get_dataset
from lr import PolynomialDecayLR
import torch
import math
import torch.nn as nn
import pytorch_lightning as pl
from utils.flag import flag, flag_bounded
def init_model_params(module, n_layers):
if isinstance(module, nn.Linear):
module.weight.data.normal_(mean=0.0, std=0.02 / math.sqrt(n_layers))
if module.bias is not None:
module.bias.data.zero_()
if isinstance(module, nn.Embedding):
module.weight.data.normal_(mean=0.0, std=0.02)
#pre_ln: On Layer Normalization in the Transformer Architecture (http://proceedings.mlr.press/v119/xiong20b/xiong20b.pdf)
class EncoderLayer(nn.Module):
def __init__(self, hidden_size, ffn_size, dropout_rate, attention_dropout_rate, head_size):
super(EncoderLayer, self).__init__()
self.self_attention_norm = nn.LayerNorm(hidden_size)
self.self_attention = MultiHeadAttention(hidden_size, attention_dropout_rate, head_size)
self.self_attention_dropout = nn.Dropout(dropout_rate)
self.ffn_norm = nn.LayerNorm(hidden_size)
self.ffn = FeedForwardNetwork(hidden_size, ffn_size, dropout_rate)
self.ffn_dropout = nn.Dropout(dropout_rate)
def forward(self, x, attn_bias=None):
y = self.self_attention_norm(x)
y = self.self_attention(y, y, y, attn_bias)
y = self.self_attention_dropout(y)
x = x + y
y = self.ffn_norm(x)
y = self.ffn(y)
y = self.ffn_dropout(y)
x = x + y
return x