init files for graph model decoder

This commit is contained in:
hyeh20 2020-09-14 13:48:04 -04:00
Родитель 87d32ae6b2
Коммит d92e105f88
5 изменённых файлов: 904 добавлений и 0 удалений

Просмотреть файл

@ -0,0 +1,428 @@
from __future__ import print_function
import numpy as np
from matplotlib import pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
# The following gather functions
def gather_edges(edges, neighbor_idx):
# Features [B,N,N,C] at Neighbor indices [B,N,K] => Neighbor features [B,N,K,C]
neighbors = neighbor_idx.unsqueeze(-1).expand(-1, -1, -1, edges.size(-1))
edge_features = torch.gather(edges, 2, neighbors)
return edge_features
def gather_nodes(nodes, neighbor_idx):
# Features [B,N,C] at Neighbor indices [B,N,K] => [B,N,K,C]
# Flatten and expand indices per batch [B,N,K] => [B,NK] => [B,NK,C]
neighbors_flat = neighbor_idx.view((neighbor_idx.shape[0], -1))
neighbors_flat = neighbors_flat.unsqueeze(-1).expand(-1, -1, nodes.size(2))
# Gather and re-pack
neighbor_features = torch.gather(nodes, 1, neighbors_flat)
neighbor_features = neighbor_features.view(list(neighbor_idx.shape)[:3] + [-1])
return neighbor_features
def gather_nodes_t(nodes, neighbor_idx):
# Features [B,N,C] at Neighbor index [B,K] => Neighbor features[B,K,C]
idx_flat = neighbor_idx.unsqueeze(-1).expand(-1, -1, nodes.size(2))
neighbor_features = torch.gather(nodes, 1, idx_flat)
return neighbor_features
def cat_neighbors_nodes(h_nodes, h_neighbors, E_idx):
h_nodes = gather_nodes(h_nodes, E_idx)
h_nn = torch.cat([h_neighbors, h_nodes], -1)
return h_nn
class Normalize(nn.Module):
def __init__(self, features, epsilon=1e-6):
super(Normalize, self).__init__()
self.gain = nn.Parameter(torch.ones(features))
self.bias = nn.Parameter(torch.zeros(features))
self.epsilon = epsilon
def forward(self, x, dim=-1):
mu = x.mean(dim, keepdim=True)
sigma = torch.sqrt(x.var(dim, keepdim=True) + self.epsilon)
gain = self.gain
bias = self.bias
# Reshape
if dim != -1:
shape = [1] * len(mu.size())
shape[dim] = self.gain.size()[0]
gain = gain.view(shape)
bias = bias.view(shape)
return gain * (x - mu) / (sigma + self.epsilon) + bias
class TransformerLayer(nn.Module):
def __init__(self, num_hidden, num_in, num_heads=4, dropout=0.1):
super(TransformerLayer, self).__init__()
self.num_heads = num_heads
self.num_hidden = num_hidden
self.num_in = num_in
self.dropout = nn.Dropout(dropout)
self.norm = nn.ModuleList([Normalize(num_hidden) for _ in range(2)])
self.attention = NeighborAttention(num_hidden, num_in, num_heads)
self.dense = PositionWiseFeedForward(num_hidden, num_hidden * 4)
def forward(self, h_V, h_E, mask_V=None, mask_attend=None):
""" Parallel computation of full transformer layer """
# Self-attention
dh = self.attention(h_V, h_E, mask_attend)
h_V = self.norm[0](h_V + self.dropout(dh))
# Position-wise feedforward
dh = self.dense(h_V)
h_V = self.norm[1](h_V + self.dropout(dh))
if mask_V is not None:
mask_V = mask_V.unsqueeze(-1)
h_V = mask_V * h_V
return h_V
def step(self, t, h_V, h_E, mask_V=None, mask_attend=None):
""" Sequential computation of step t of a transformer layer """
# Self-attention
h_V_t = h_V[:,t,:]
dh_t = self.attention.step(t, h_V, h_E, mask_attend)
h_V_t = self.norm[0](h_V_t + self.dropout(dh_t))
# Position-wise feedforward
dh_t = self.dense(h_V_t)
h_V_t = self.norm[1](h_V_t + self.dropout(dh_t))
if mask_V is not None:
mask_V_t = mask_V[:,t].unsqueeze(-1)
h_V_t = mask_V_t * h_V_t
return h_V_t
class MPNNLayer(nn.Module):
def __init__(self, num_hidden, num_in, dropout=0.1, num_heads=None, scale=30):
super(MPNNLayer, self).__init__()
self.num_hidden = num_hidden
self.num_in = num_in
self.scale = scale
self.dropout = nn.Dropout(dropout)
self.norm = nn.ModuleList([Normalize(num_hidden) for _ in range(2)])
self.W1 = nn.Linear(num_hidden + num_in, num_hidden, bias=True)
self.W2 = nn.Linear(num_hidden, num_hidden, bias=True)
self.W3 = nn.Linear(num_hidden, num_hidden, bias=True)
self.dense = PositionWiseFeedForward(num_hidden, num_hidden * 4)
def forward(self, h_V, h_E, mask_V=None, mask_attend=None):
""" Parallel computation of full transformer layer """
# Concatenate h_V_i to h_E_ij
h_V_expand = h_V.unsqueeze(-2).expand(-1,-1,h_E.size(-2),-1)
h_EV = torch.cat([h_V_expand, h_E], -1)
h_message = self.W3(F.relu(self.W2(F.relu(self.W1(h_EV)))))
if mask_attend is not None:
h_message = mask_attend.unsqueeze(-1) * h_message
dh = torch.sum(h_message, -2) / self.scale
h_V = self.norm[0](h_V + self.dropout(dh))
# Position-wise feedforward
dh = self.dense(h_V)
h_V = self.norm[1](h_V + self.dropout(dh))
if mask_V is not None:
mask_V = mask_V.unsqueeze(-1)
h_V = mask_V * h_V
return h_V
class PositionWiseFeedForward(nn.Module):
def __init__(self, num_hidden, num_ff):
super(PositionWiseFeedForward, self).__init__()
self.W_in = nn.Linear(num_hidden, num_ff, bias=True)
self.W_out = nn.Linear(num_ff, num_hidden, bias=True)
def forward(self, h_V):
h = F.relu(self.W_in(h_V))
h = self.W_out(h)
return h
class NeighborAttention(nn.Module):
def __init__(self, num_hidden, num_in, num_heads=4):
super(NeighborAttention, self).__init__()
self.num_heads = num_heads
self.num_hidden = num_hidden
# Self-attention layers: {queries, keys, values, output}
self.W_Q = nn.Linear(num_hidden, num_hidden, bias=False)
self.W_K = nn.Linear(num_in, num_hidden, bias=False)
self.W_V = nn.Linear(num_in, num_hidden, bias=False)
self.W_O = nn.Linear(num_hidden, num_hidden, bias=False)
return
def _masked_softmax(self, attend_logits, mask_attend, dim=-1):
""" Numerically stable masked softmax """
negative_inf = np.finfo(np.float32).min
attend_logits = torch.where(mask_attend > 0, attend_logits, torch.tensor(negative_inf))
attend = F.softmax(attend_logits, dim)
attend = mask_attend * attend
return attend
def forward(self, h_V, h_E, mask_attend=None):
""" Self-attention, graph-structured O(Nk)
Args:
h_V: Node features [N_batch, N_nodes, N_hidden]
h_E: Neighbor features [N_batch, N_nodes, K, N_hidden]
mask_attend: Mask for attention [N_batch, N_nodes, K]
Returns:
h_V: Node update
"""
# Queries, Keys, Values
n_batch, n_nodes, n_neighbors = h_E.shape[:3]
n_heads = self.num_heads
d = int(self.num_hidden / n_heads)
Q = self.W_Q(h_V).view([n_batch, n_nodes, 1, n_heads, 1, d])
K = self.W_K(h_E).view([n_batch, n_nodes, n_neighbors, n_heads, d, 1])
V = self.W_V(h_E).view([n_batch, n_nodes, n_neighbors, n_heads, d])
# Attention with scaled inner product
attend_logits = torch.matmul(Q, K).view([n_batch, n_nodes, n_neighbors, n_heads]).transpose(-2,-1)
attend_logits = attend_logits / np.sqrt(d)
if mask_attend is not None:
# Masked softmax
mask = mask_attend.unsqueeze(2).expand(-1,-1,n_heads,-1)
attend = self._masked_softmax(attend_logits, mask)
else:
attend = F.softmax(attend_logits, -1)
# Attentive reduction
h_V_update = torch.matmul(attend.unsqueeze(-2), V.transpose(2,3))
h_V_update = h_V_update.view([n_batch, n_nodes, self.num_hidden])
h_V_update = self.W_O(h_V_update)
return h_V_update
def step(self, t, h_V, h_E, E_idx, mask_attend=None):
""" Self-attention for a specific time step t
Args:
h_V: Node features [N_batch, N_nodes, N_hidden]
h_E: Neighbor features [N_batch, N_nodes, K, N_in]
E_idx: Neighbor indices [N_batch, N_nodes, K]
mask_attend: Mask for attention [N_batch, N_nodes, K]
Returns:
h_V_t: Node update
"""
# Dimensions
n_batch, n_nodes, n_neighbors = h_E.shape[:3]
n_heads = self.num_heads
d = self.num_hidden / n_heads
# Per time-step tensors
h_V_t = h_V[:,t,:]
h_E_t = h_E[:,t,:,:]
E_idx_t = E_idx[:,t,:]
# Single time-step
h_V_neighbors_t = gather_nodes_t(h_V, E_idx_t)
E_t = torch.cat([h_E_t, h_V_neighbors_t], -1)
# Queries, Keys, Values
Q = self.W_Q(h_V_t).view([n_batch, 1, n_heads, 1, d])
K = self.W_K(E_t).view([n_batch, n_neighbors, n_heads, d, 1])
V = self.W_V(E_t).view([n_batch, n_neighbors, n_heads, d])
# Attention with scaled inner product
attend_logits = torch.matmul(Q, K).view([n_batch, n_neighbors, n_heads]).transpose(-2,-1)
attend_logits = attend_logits / np.sqrt(d)
if mask_attend is not None:
# Masked softmax
# [N_batch, K] -=> [N_batch, N_heads, K]
mask_t = mask_attend[:,t,:].unsqueeze(1).expand(-1,n_heads,-1)
attend = self._masked_softmax(attend_logits, mask_t)
else:
attend = F.softmax(attend_logits / np.sqrt(d), -1)
# Attentive reduction
h_V_t_update = torch.matmul(attend.unsqueeze(-2), V.transpose(1,2))
return h_V_t_update
class Struct2Seq_decoder(nn.Module):
def __init__(self, num_letters, node_features, edge_features,
hidden_dim, num_encoder_layers=3, num_decoder_layers=3,
vocab=20, k_neighbors=30, protein_features='full', augment_eps=0.,
dropout=0.1, forward_attention_decoder=True, use_mpnn=False):
""" Graph labeling network """
super(Struct2Seq_decoder, self).__init__()
# Hyperparameters
self.node_features = node_features
self.edge_features = edge_features
self.hidden_dim = hidden_dim
# Embedding layers
self.W_v = nn.Linear(node_features, hidden_dim, bias=True)
self.W_e = nn.Linear(edge_features, hidden_dim, bias=True)
self.W_s = nn.Embedding(vocab, hidden_dim)
layer = TransformerLayer if not use_mpnn else MPNNLayer
# Encoder layers
self.encoder_layers = nn.ModuleList([
layer(hidden_dim, hidden_dim*2, dropout=dropout)
for _ in range(num_encoder_layers)
])
# Decoder layers
self.forward_attention_decoder = forward_attention_decoder
self.decoder_layers = nn.ModuleList([
layer(hidden_dim, hidden_dim*3, dropout=dropout)
for _ in range(num_decoder_layers)
])
self.W_out = nn.Linear(hidden_dim, num_letters, bias=True)
# Initialization
for p in self.parameters():
if p.dim() > 1:
nn.init.xavier_uniform_(p)
def _autoregressive_mask(self, E_idx):
N_nodes = E_idx.size(1)
ii = torch.arange(N_nodes)
ii = ii.view((1, -1, 1))
mask = E_idx - ii < 0
mask = mask.type(torch.float32)
return mask
def forward(self, V, E, E_idx, S, L, mask):
""" Graph-conditioned sequence model """
# Prepare node and edge embeddings
# V, E, E_idx = self.features(X, L, mask)
if V is None:
h_V = torch.zeros(S.shape[0], S.shape[1], 128).float()
h_E = torch.zeros(S.shape[0], S.shape[1], 1, 128).float()
E_idx = torch.from_numpy(np.array([1] + list(range(0,139))).reshape(S.shape[0],S.shape[1],1)).long()
else:
h_V = self.W_v(V)
h_E = self.W_e(E)
# print('h_E: ', h_E.shape)
# Encoder is unmasked self-attention # SKIP
# mask_attend = gather_nodes(mask.unsqueeze(-1), E_idx).squeeze(-1)
# mask_attend = mask.unsqueeze(-1) * mask_attend
# for layer in self.encoder_layers:
# h_EV = cat_neighbors_nodes(h_V, h_E, E_idx)
# h_V = layer(h_V, h_EV, mask_V=mask, mask_attend=mask_attend)
# Concatenate sequence embeddings for autoregressive decoder
h_S = self.W_s(S)
h_ES = cat_neighbors_nodes(h_S, h_E, E_idx)
# Build encoder embeddings
h_ES_encoder = cat_neighbors_nodes(torch.zeros_like(h_S), h_E, E_idx)
h_ESV_encoder = cat_neighbors_nodes(h_V, h_ES_encoder, E_idx)
# Decoder uses masked self-attention
"""
mask_attend : autoregressive mask
mask : input mask, to mask the nodes/edges with nan values
mask_bw : applies both masks together
"""
mask_attend = self._autoregressive_mask(E_idx).unsqueeze(-1)
mask_1D = mask.view([mask.size(0), mask.size(1), 1, 1])
mask_bw = mask_1D * mask_attend
if self.forward_attention_decoder:
"""
mask_fw : basically opposite of mask_bw
"""
mask_fw = mask_1D * (1. - mask_attend)
h_ESV_encoder_fw = mask_fw * h_ESV_encoder
else:
h_ESV_encoder_fw = 0
for layer in self.decoder_layers:
# Masked positions attend to encoder information, unmasked see.
h_ESV = cat_neighbors_nodes(h_V, h_ES, E_idx)
h_ESV = mask_bw * h_ESV + h_ESV_encoder_fw
h_V = layer(h_V, h_ESV, mask_V=mask)
logits = self.W_out(h_V)
log_probs = F.log_softmax(logits, dim=-1)
return log_probs
############## OUR UTIL FUNCTIONS #################
# get node features
def get_node_features(omega, theta, phi):
# omega is symmetric
n1 = torch.cat((torch.tensor([0]), torch.diagonal(omega, offset=1)))
# theta is asymmetric
n2 = torch.cat((torch.diagonal(theta, offset=1), torch.tensor([0])))
n3 = torch.cat((torch.tensor([0]), torch.diagonal(theta, offset=-1)))
# phi is asymmetric
n4 = torch.cat((torch.diagonal(phi, offset=1), torch.tensor([0])))
n5 = torch.cat((torch.tensor([0]), torch.diagonal(phi, offset=-1)))
ns = torch.stack([n1, n2, n3, n4, n5], dim=1)
# # maybe add secondary structure
return torch.cat([torch.sin(ns), torch.cos(ns)], dim=1)
def get_k_neighbors_idx(array, k):
return torch.topk(array, k, largest=False)[1]
def get_k_neighbors(dist, k):
E_idx = []
for i in range(len(dist)):
E_idx.append(get_k_neighbors_idx(dist[i,:], k))
return torch.stack(E_idx)
def get_edge_features(dist, omega, theta, phi, E_idx):
dist_E = []
omega_E = []
theta_E = []
theta_Er = []
phi_E = []
phi_Er = []
for i in range(len(E_idx)):
dist_E.append(dist[i, E_idx[i]])
omega_E.append(omega[i, E_idx[i]])
theta_E.append(theta[i, E_idx[i]])
theta_Er.append(theta[E_idx[i], i])
phi_E.append(phi[i, E_idx[i]])
phi_Er.append(phi[E_idx[i], i])
dist_E = torch.stack(dist_E)
omega_E = torch.stack(omega_E)
theta_E = torch.stack(theta_E)
theta_Er = torch.stack(theta_Er)
phi_E = torch.stack(phi_E)
phi_Er = torch.stack(phi_Er)
E = [dist_E, omega_E, theta_E, theta_Er, phi_E, phi_Er]
return torch.stack(E, dim=2)
def get_mask(E):
return torch.tensor(np.isfinite(np.sum(np.array(E),(1,2))).astype(np.float32))
def replace_nan(E):
isnan = np.isnan(E)
E[isnan] = 0.
return E
def get_S_enc(seq, tokenizer):
return torch.tensor(tokenizer.tokenize(seq))

Просмотреть файл

@ -0,0 +1,386 @@
>seq1
SISTRIGEYRSAQSKEDLIQKYLNQLPGSLCVFFKFLPSVRSFVATHASGIPGSDIQGVGVQLESNDMKELSSQMAIGLLPPRFTEMLVEAFHFSPPKALPLYAHNALEGVFVYSGQLPAEEVARMNEEFTLLSLCYSHF
>seq2
AFPMRIADYRSAQSKEDMIQRFLNGISGSRCLFFKFLPSVRSFVATHANGIDAAQIQGVGSQLTSDEMKDLGSLLAMGLIPEKFSSMLVEAFHLNPPKAIPVYANAHLEGLFVYSGGMDKKAIEQLNDEYTLFNLCYSHF
>seq3
AISEVINDYRIAESKEDIIRMLFQNLSNLPLLFFKFLPSMNSFVMSHASMPNHQVYEGLGSALNPEETKDLVKQILLNIVPASFNQVISNMFMFQRPSMIPLFDRENLEGVFVFDQESANSLIDQMQDYVSATSLYYSLY
>seq4
HVSMRITDYRSAQSKEDLIQKYLNHLPNALCIFFKFLPSVRSFVATHAQGIPASDIQGVGVQLETGDTKDLTTQMAMGLLPARFNDMLVEAFHFNPPKALPLYAHHALEGVFVYSGNITPAEASQLAEEFSLLSLCYSTF
>seq5
FISPRIGDYRSAQSKEDMVQKYLNNLPQMVCLFFKFLPTVRSFVATHATGIPASDIQGVGVQLDSADMKELNSQMAMGLLPSRFMDMLTEAFHFRPPKALPLYAHNNLEGVFVYSGEVNKQLSTAMTEEFSLMSLCYSNL
>seq6
KVSMRITDYRSAQSKEDLIQKYLNHLPNSLCIFFKFLPSVRSFVATHAQGIPASDIQGVGVQLESSDVKDLTTQMAMGLLPARFNEMLVEAFHLSPPKALPLYAHHALEGVFVYSGSISAAESAQLAEEFSLLSICYSAF
>seq7
SIGQKIQLYSAATTKDEVLIHFINQLSC-QAIYFKYLPTVQSFVATHSHGLDIEAIKGVGTRLEATETAHLLDLLKSGEIPPALAELMKEGLRIGqyfpKPVIVQNSTLTGLDGLFIFWGAEGF-HFQQIESDFLIFSLLYQQA
>seq8
AISSRIATYKVAESKEDMLHKFLQHIPKeTLCIYFKYLPSVRSFVATHGWGIPNSDIQGVGVQLEAEDMRTLSERVTMGQLPERFSKMLKEAFGFNPPKALPLYAYHSLEGVFVFSGSLDAKYVAEINEEFTLLSLCYSNF
>seq9
PFQTRIAEYRAAESKEELVQMFFRQTASQSWAFLKYVSSIQTYISVSSQNMPDEWVEGLSYKI-PSSQSDFNDKILLGEYPESFLQYIKAKWGVDTLKVMPLLLKNEIEGLLITPQDI----SAEVAEDFSLMSLVYNLI
>seq10
PFQMRIAEYKMSESKEELLQKFYQQSPKQSWVFLKYIKSIQTYISVSHQNMEPSWVEGLSFKI-PTDVQEFNSRVFVGDFPDSLISYIKTKWDVSNLKILPLTFKDEIEGLLISTQDI----SADVAEDFSLMSLVYQLM
>seq11
-VSMRITDYRTAQSKEDLIQKYLNHLPGnTVCVFFKFLPSVRSFVATHALGVPASDIQGVGVQLESSDMKDLSAQMAMGLLPPRFTEMLVEGFHFNPPKALPLYAHNVLEGIFVYSGSLPAEDTAVMNEEFTLMSLCYANF
>seq12
-FSYKIAEYKTAQSKEDLLQKFLNNLEHTLCVYFKFLPSVRSFVATHANGIPAASIQGVGCQLESDDMKELGSQLALGLLPERFSSMLVEAFHFNPPKALPLYANNILEGVFVYSGLLGGTAAEMMGEEFSLFSLCYSHF
>seq13
-VSMRIADYRSGQSKEDLIQRFLNNAPESLIVYFKFLPSVRSFVATHAKGAEGSSIQGVGCQIEMSEARDLGSQMAMGEIPPTFVSMLKEAFHFNPPKALPIYSHSGLEGVLIYSGDMDKLMAHKLQEEFSIFSLCYSF-
>seq14
-VSVRLADYRTVQSKEDLIQRYLNHLPTqVGSVFFKYLPSVRSFVATHGAGIPAGDIQGVGVQLESSDVKDLPSQLALGMLPDRFANMLVEAFQFNPPKVLPLYAHNNLEGVFVYSGTTSPADITFLSEEFALMSLCYVN-
>seq15
--SMRIADYRSGQSKEDLIQKFLNNAPESLIIYFKFLPSVRSFVATHGKGIEGSSIQGVGCQIEMQEARDLGGHLSMGQIPASFVSMLKEAFRFNPPKALPVYSHAGIEGVVVYSGDIDKLVLHKLQEEFAVFSLCYSY-
>seq16
-FQMRITDYKLAESKEELLQKFFSQTPTQSWVFLKYIKSIRTYIAVAHQNMEENWVEGLSFKI-PGDEEKFNQQIMIGNFPESLTDYLKGKWDIQALKVVPLILKDQIEGLLVTPQDI----SAEVAEDFSLMSLVYQV-
>seq17
-FQMRISEYRTAESKEDLLQTFFKQTPTQAWVFLKFIKSIQTYISVLHQNMPESWVEGLSFKI-PIQETQFNQKIMVSDFPSSFLNYIKSKWNVEHVKILPLIIKNELEGLLVSTQDIDG----NVAEDFSLMSLVYA--
>seq18
----LIGDYRLSNSKEDIVQKFISSLQKTSCVYFKFLPSVRSFVATHSNGIDARLLKGVGSQLGKEESKNLTSTLSLGEVPPSMKDLLTEGFHFQSPKILPLFIQNQLDGLIAYDGKIDRNELQDFHERFSLFSLVYSHY
>seq19
-VSNRVADYRSAQSKEEVLQKYFERLGKIPAVYLKFLPSVRSFVATHASGFPPSHIQGVGCQLENNDLESLNTQITVGLLPPLMNELLQKVFHFASPRVVPLFVQNQLEGAVVYNGNLSKAESLRVGEEFSLFALCYSY-
>seq20
-FQTRIAEYRSADSKEDLLQKFFSQTPQQSWAFLKYVKSFNSYILVSSQNMPEDWIQGVSFKI-PNSEPDFNKKVTVGEFPASFLNYLKRKWEVEIIKVLPLLLKDDVEGLLVTTQDI----SPEVAEDFSLASLMYNL-
>seq21
-FQMRISEYRAAETKEELIQLFFKQTPQQSWAFLKYAPSIQTYISILSQSMPDSWVEGLSYKV-PSAMKDFNQQIMLGEYPAVLTEYICAKWGVKTVKMMPLILKDEIEGLLVTPQDI----SAEVAEDFSLMSLVYNL-
>seq22
-ISEVLNDFRIAESKEDIIRMLFQNLTNMPLLFFKFLPSMNSFVMSHASMPNHQVHEGLGSALNPEETKDLIKQILLNIVPASFNQVVANMFAFQRPSLIPLFDRDSLEGVFVFDQESSHTLIDQMQDYVSATSLYYSL-
>seq23
-FQMRIAEYRVAESKEELIQKFFKQTPAQAWVFLKFVKPIQTYISVSHQNMPEAWVAGLSYKI-PINQPDFNEHVIIGSYSESFLKYIKTKWSVDNVKIFPLIFKNEIEGLFVSPQDI----SAEAAEDFSLMSLVYSL-
>seq24
-FQSRIAEYRSADSKEELLQKFFSQTPQQSWAFLKYVKSISSYILVTSQNMPETWIQGLSYKV-PATDAEFNKKIMLGEFPDNLLNYLKRKWEVDIVKILPLLLKDDVEGLLVTTQDI----SSEVAEDFSLASLMYNL-
>seq25
-FQMRINEYKSADSKEDLLQIFFRQTPLHSWVFMKYVPSISTYIAVANQNMPQSWIEGLSFKVSALE-TEFNNKVAVGDFPLSLTYYLKSKLEVETVKILPLVIKNDVEGILITTQDI----PAEVAEDFSLMSLNYAL-
>seq26
----------------------------SMCIFFKFLPSVRSFVATHGNGVEGSQIQGVGCQLEADDTKDLNSQLSVGLLPARFNAMLVEAFHFNPPKGLPLYGAQNLEGVFVYSGSLDKNAAAQLNEEFALFSLCYSH-
>seq27
----RITDYKSADSKESLVQKYLQRSESTPMVFFKFLTSVRSFVVTQSVHMDVERIQGLGAQLSPGELKDLNAQLSVGLLPPSLLKMVNEAVHLQEFRVWPLYVHHQLEGVVICQVGE---ETTDLHEEFSLFGVMYSHF
>seq28
-----IKDYRSAASKEELLRRFVQVAGKTACVFLKYLPTVRSLVVTNASVFDLDHLQGLGCQLQPNEAKDFGSQVALGIVPPSLHDLLRQAFQFQKSRLLPLFIQDRLEGVVAYSTQIAPSEKMRLDDEFALMSLAYTA-
>seq29
--QLRIAEYKSATSKEDLLNVFYNQTPTQSWIYLKFVPSIETFICVSYSQVPEDWVEGLSYKVATKDRDNFMSKLFSGALPPNLGNYLKNKFGTDRIKFLPMIIRDKIEGILISTQEI----SAEVAEDFSLMSLVYT--
>seq30
-ISAKIAFYKACVNKDEVVLAFFRRLSC-KAIFFKYLPTVNSFVALSAQGVDVEDIKGVGSRLEPSESKDLPKQLSEGVLPQALVQILKEGLHVSRFEWKPLLVQNWVEGVLVFWGDEKF-QFASIENEFLIFDLIYQKM
>seq31
--QIRISEYKSCQSKEELLDVFYKQAETQSWVYLKFIPTIETFISVSNHQVPEYWVEGLSYKV-PASNKGFMDQIFQGVLPESFERYLIQKFNVKQIKFIPLIIKNQVEGLMISTQDI----TADSAEDFSLMSLVYTN-
>seq32
--QLRIADYKSASSKEDLLNTFYSQTPAQSWVYLKFVPSIETFLCVSYAGVPEEWVDGLSYKV-NSKDKDFMSQLLMGSLPLSIATYFKSKFGTDHVKYLPMIIRDKIEGILISTQEI----SAEVAEDFSLMSLVYTL-
>seq33
--QSRISTYRLAESKEQLLDQFYQATPTQTWIYLKLVPTIQTLICVSSANCPEDWSEGLSYKI-PTREKNFSDLLISGQMPEGLLSYLKVKLAVDKIKFLPLIIKQAVEGVLVSTQDI----SAEVAEDFSLMSLVYSN-
>seq34
--QTRIALFKGAESKENLLDLFYQQTPEQSWVYLKFAQTIQTFICVSYANVPENWIEGLSFKV-PIKEKNFLEQISLGALPESLSNYLTQKFGVERVKFLPLMLRDSLDGILISPQDI----SAEVAEDFSLMSLIYTN-
>seq35
--QNRIAIYRLAESKEQLLDQFYQATPTQTWMYLKLVPTIQTLICLSSANSPASWTEGLSYKI-SNKETSFLEQILNGLLPENLHSYLKNKFDIDKVKFLPLIIKQNIEGLLISTQDIDA----NVAEDFSLMSLVYTN-
>seq36
--------------------------------------SLRSFVATHGNGILSSEIKGVGVQLESEDLKDLASNLAMGLLPARFNEMLVAAFQFNPPKALPLYAHNSLEGVFVYAGNMSATEIKNISEEFSLMSLCYSNF
>seq37
--QSRISTYRLAESKEQLLDQFYQATPTQAWFYLKLVPTIQTLICVSSANCPEEWSEGLSYKI-PAREKGFSEQLLSGIIPEGLLSYLKVKLGVDKIKFLPLIIKQSVEGLLISTQDI----SAEVAEDFSLMSLVYSN-
>seq38
-------------------------LEKIPAVYFKFLPSVRSFVATHASGFNANQIQGVGCQLENQDLEGINTQVSVGLLPPLMLEMFRKVFHLENPKVLPLFVQNAMEGIVVYSRELSKADSFRVGEEFSLFSLCYS--
>seq39
--QTRIADYKLAESKEELLNTFYRNTPDQTWIYLKYIESIQTFMGISSHLAPESWVEGLSFKI-PRGQSEFNQTIARGGLPKDFLNYLTEKFDTARIKTLPILLRDKVDGLLVTTQEIPI----EVSEDFSLFSVIYA--
>seq40
--AEKMRVYQGAQSKDDYLAVFLQHLPC-HAIYFKFLLTVNSFVATASQKLEIESIKGVGSRLTADEVKSLVEDLEAGRLPASLKELMNEGLKVPRYYSQPVPVHRGLDGLLVFWGDAEF-QYQDIENDFLIFHMLYQQ-
>seq41
-FQTRIAEYRSAESKEDLLQKFFAQTPQQSWAFLKYVKSINSYILVSSQNMPESWIQGVSYKI-QNAEADFNKKVIVGEFSQNFLNYLKRKWEV----------------------------------------------
>seq42
--TSHVLKYQQASNQEELLQTFMNSLGAIHSIYFKFLPTVNSFVATLSHGIDIESVKGAGSRLTDAESKDVHEFLASGQIPEALRALMQEGLKIVQFISQPVLLYRSLDGLFIFWSSENF-NFSLIENQFQIFQLVYQN-
>seq43
------------------------QAPTQSWAFIKYVKTIQTYVSMSSQNMPGDWVEGLSFKI-PTDQSDFNDKLIVGSYSDSFLDYIKNKWGVKTVKILPLLHKNEVEGLLVTPQDV----SAEIAEDFSLMSIVYSL-
>seq44
--KELVQKYKATDSKEDLLGQYLGKLKF-QALFMKYLPTVQSFVATYSQGIDLDSIRGVGARLNTEESARLNELTQNEYLPPSLREVIEKGLGVRQFAVKSVTGATGLDGVFVFWSDHNF-DFSQHEDEFLIFQLLYQN-
>seq45
----FISDLRAAQSKEEMISTMLRESKETPLVYLRYLPSMASFLVTDSSYANVENFKGLGCRLTAEENKDLAKQLELAIVPPSLAELLMKAFRMNSPRIRSLMSGSVLEGILVGDASS-AESTHYLNERFAIMSLVYSHF
>seq46
--SEEFKKYRNATSKEDVLQIFLKEINQtflaknnkLSSLYFKFLPSVQSFVATQSIGVDIDSVKGVGGKFTEQDSKDPLELIKNGHVPSMIQELMKEGFSTEDFIFKPVFFDQMLDGFFIFWSNTQKVYSEEFENYFTLFLLFYE--
>seq47
-VREKINAYQKALAKDDFVQTFLEQLPC-NAIYFKWLPSVVSFVATCSKGLDIESLKGVGSRMTLEESRSVNEFLESGKLPEALNELMQAGLNIKNYFSQLIPVYHGFDGLIVFWGDENF-HFEQIENDFLVFRMLYQQ-
>seq48
----FISDLRAAHSKEELIATLLREARGTPLVYLRYLPTMGSFLVTDTSYEPPEEFKNLGCRLAPEENKDLVRQLELALVPPSLGELLSQAFRIQNPRVRPLLNGAELEGILVGDANS-PEITHELNERFAIMSLVYSHF
>seq49
-VSHRVADYRSSQSKEEVLQKYFERLDKVQAVYFKYLSSVRSFVATHAAGFTSQQIQGVGCQLEGADLESLNTQIAVGLLPPIVMEMLQKVFNIQT--------------------------------------------
>seq50
----------------DLVQKFVSSLQKTSCVYFKYLPSVRSFVATHSSGIDARLLKGVGSQLGAEESKNLNTSLGMGEIPKSLKELLGEGFHFQQPRVLPLFVQSQLDG------------------------------
>seq51
--TEKANAYEDLTTKDEYLSAFLLRIPC-RAIYFKFLPSVNSFVAVSGHGIDIESIKGIGARMSPDESKDTLQFLQKGALPESLREIIVEGLKISQYLVKPVPLYRSLDGLFVFWGDEGF-DYREIENEFVLFHLFLQ--
>seq52
-FQTRIAEYRSADSKEDLLQKFFKQTPSQSWAFLKFVKSINSYILVSSQQMPEEWVQGLSYKI-PNSEKDFNDRMMIGEFSDGFLTYIK---------------------------------------------------
>seq53
-IKDFIMDLQAAESKEELLQVFLRESANLSLVYLKYLPSMSSFVVTHTAHLPLEKTEGLGCRLSPEENKDLLKQFSLGLVAPSLMELLEKAFQIKSPRVRPLFDRQALD-------------------------------
>seq54
-FQMRIADYRVAESKEELIQIFFKQTPTQSWAFLKFVKPIQTYISVLQQNMPEAWVEGLSYKI-PVNQTDFNDNVVIGSYPEAFLKYIKNKWD-----------------------------------------------
>seq55
--QLRIAEYKSCESKEELLDIFYKQTDQQSWVYLKYIPTIETFISVSQHEVPDNWVEGLSFKV-PAQDKDFLSQIFQGNLPPALENYLLGKFKVPQIKFIPL--------------------------------------
>seq56
--QLRISDYKLAESKEELLDLFYKNTPEQSWVYLKFIESIQTFMGISSHLIPDSWMEGLSFKV-SKNQKDFMNLVRQGELPKNFLQYLSQKFDTAHLKFLPVLLRDKVDGV-----------------------------
>seq57
----LIQKFSRASSKEEFVDVYFKYVSErksgapFSAVYFKYLPSVVSFIATQGHQINLERTKGIGLKLMGDEAGHLAEQLQNQNLPMGFQNLLQEAFQIQDWTVYPLFLKDQVEGVVIFWGLE---LTPGDWEEFLLFQLCYQN-
>seq58
--EEKIQLYRYAVTKDDYLEAFLRQIPG-RAIYFKFLPTVGSFVSTFAQGLNLDDLKDVGVRLTFEESKDVDTFFKEGGTPVALKELLAEGLHVDGYMTKPVFSINTLEGIFVFWNFNAA-Q------------------
>seq59
-----------CQSKDDYVLAFLKQLSC-NAIFFKFLPTVSSFVATAAQGLDIETMKGVGSRMSLDESKDVKAFLSSGQVPQALNELMTDGMKVPQFYSHLVSVPQGPEGLLVFWDGQ-RSGPPAIEDDFLIFQLLYQQ-
>seq60
--------------------------------FLKYVKSINSYISVSSQNMPESWVEGLSYKV-PNNETDFNQNMLVGIYSEHFLNYLKRKWSVDIVKVLPLTLKDQIEGLLVTPQDIKG----EVAEDFSLVSLVYS--
>seq61
-FNDEIKKYRKAANKEEVIADFLKDLNQkflsrnqrLFAIFFKYLPSVYSFVSLQSLGLDVESLKGVGFRLTPEEARQPAELFAEGKMPSQLTQLLQEGLKVPSPLVCPVLVQGKPEGYFCFWTNSGGLSLESIANELSLFQVLFE--
>seq62
--------------------------------FLKFVKSINSYILVSSQNMPEGWIQGVSFKI-PGSEPDFNNKVIIGEFSPSFLNYLKRKWEVDIVKVLPLVLKDEVEGLLVTTQDI----SPEVAEDFSLASLMYNL-
>seq63
-VAERIRNYLSVESKEDLLSRWMLGLGEKPCAYLQYLPSVRSLVVTHGTL-PES--QGVGCQLTPAEAQDFATQVALGVVPPTLDDLLKKAFKFSSVRLLPLFTQGKLEGVA----------------------------
>seq64
------------------------------CVYFKFLPTVMSFVATNGYHVDLERTKGIGAKMTPDETKTLLATILERKMPPSLHTLMAEAFRVSQSFLYPLLLKNQIEGVFVFWGVAESVFPRSFGAEFSLFQLCYQN-
>seq65
--GREISAYANAKTREEMTDVFLQQMQQkclrrnlnLRALILRFLPTVQSFVATQSLGLDIDKLRGIGARLEKDEAEKLTELARAGQHPKQLLQLLQQGLALDNFQVQPMILRDHVEAYFVFWVESGVLQKTDFENEFTTYSLLYY--
>seq66
-FQEKIRLFDAVESKDETIHTFFRALPC-RAIFFKFLPTVNSFVATMAKGLDIESVKGVGSRLKSDEIEKLQKILQSGALPSALEDLMTN--------------------------------------------------
>seq67
-------------SKEEMIWSFLSHLPC-RGIFFKFLPTVQSFVATQAHGLDIDXXXXXXSRLTQEEARDLDQLLQKKSLPKVLDELMKDGLRVPSYQSHVLRVQRHLEGLFVFWGLE-AEGAAQVENSLLIFQLAYENF
>seq68
-IQDRLNEYRSAESLEILVQRFLGQFQGRLVIYFKYIEAVRSLLALNAQGVEISKLAGLGLQVEVKDLQIFYDQLLLGLAPSKVISEVSELFGSTKLKSFPIYVFEKLQGFFITVDED----LEIFESDLSLVSLAYSH-
>seq69
-----------MQSKEELIRWVFSRIEArysqeskINGIFFKYLPTVSSLVATLTMGLGSDKAKGVGAKLTPEERKTLGSDLSGNLLPESLKNVLTSGFHVSEFSSFPVFVQNQVEGVFIFWPMQ-ISESDKI--ELNIFQLYY---
>seq70
---------------EGVVQTFLELSSElvekKPVLFLKYLPAHSALVAAQVAQMDPEKIKNVGFSVAQIDPKELNELFQHPEKIQQLTELMKVVFSQNEFSSLPFIYQNQVQGVFVLFGSFKSESDQKvFESYMQLMNVRYDN-
>seq71
--KNEVMVYAKAKSKEDILDLFLHHLEQkclrqnlkLNALVLKFLPTVQSFVATQSLGLDLEKVKGVGARLEKDEAQDLLGFLESGHFPKQLIHLLNQGLGVSNFVGKTLMLHDAIEALVVFWSSNGTLKAAH---------------
>seq72
----------------------------------KYLPTVKSLVVTHASATSAERLNGVGCQLAAEEARDFASQVALGMVPPSVSKLLSEAFAMNSASLWPLFLHRQLEGVVAFSREVDPRVMAALRDEFSLFSTLYAN-
>seq73
-----------SHTVEEATQDWLNKANLlfsgVRAAFFKYVPGHPYLMLTQCSGMELESVRGVGVNLTGLSPLEQKSFYSHTRFLMALKDLLKGAFNSDGFEYREIISDRGVLGIVVVFKTLEHEREKIfFNDSVEILNIVV---
>seq74
-ISNLLTLYENTRTREDLLQVFFHSLESyclegAKVLYFKYLEPVQLLVATHGCGVPVDDIKGAGIRLQPQEVLDAKNLLLSPRGFGSLNKLLTDIFHVDQCYVKPLIVREDIDGLFVFFGESLESfNSIRFSNRFSLFRVCFER-
>seq75
--GEELAKYGKATSKEEVLSLFFRELEAkfsrleipIKALFFKYLPTVQSFVAMQGLGLDLDSIRGVGGRLVNEESHDPEGFFAKGALPYELKMLLNELVGRDepqmQMKSLYISIRDQVDGLFVLWGSSPQIKWQVIQNEFAFFNLLYE--
>seq76
-----LSSFHNATNSDELLQIMVNFISQSKVIFLKYFEGIQSFVGFQSNFGSAEEIQSIGCQLKPPESNELAKQMSLGIVPVTLKELSQKVFAFSQPQFWPLVIEiGKVEGVMISEGGGDEALKADILNRWSLFSLVYKNF
>seq77
---DEVKIFEGAHTKSDLMALFLRRLSDlmreqqvgLKAIYFKYMPSVQSFVVMQSLGIDENSIQGLGGKLSPEEVKSFPDLFQQGTIPEQIQLLLEQGLKIQQFQSQVLWLNEILDGFFIFWSDQMRLETQFYSAQFLIFKMI----
>seq78
---------ELCGSVEEAIEACMFQISNwaesAALLFFKYIPGHPYLVLAQSKGIDSDKYRGVGVSMTGLAFSELGHFSEHGRFLSQLDSVMKGAFKAEKYTFSEVKSEQGILGIVVLLKSLESRAQKRfFEDCISVLNLTSQK-
>seq79
-FKEFISDLRAAQSKEELIATMLRETKETPLVYLRYLPTMASFLVTDTSYTPAENFKGLGCRLTPEENKDLAKQLELAIVPPSLAELL----------------------------------------------------
>seq80
-ISNLLTLYENTRNREDLIQVFFHSMEEycqegAKVLYFKYLEPVQLLVATHGSGIPVDQIKGAGIRLLPEEALEAKNLLISPRGFGSLNKLLLDIFHIEQCYVKTLFVREDIDGLFVFFGDSLEvFNTIKFSNRFSLFRVCFER-
>seq81
-----------------------------QGLYLKYLPTVHNLVATRALGLPIEKLKGVGAKLTPEEVQQLDLTVARHEVPPSVKALMVEGFHVPEFAPRGLLVHRGVDGLFVFWSKTSF-DIDLLDNEFMTFSQAYQM-
>seq82
---------------DHVVQMLLDAISQviqnKPVLFFKYLPQHSSLITSHASKIPVEKIKNLGINLSQVEATKIPEMLLHPLTIPGLPDLMKEVFQVPSYHAIPFVHQNQ---------------------------------
>seq83
---------------------------------FKYLPAYSSLVTSHASKIPIEQIKNLGVNLNQFQPLEIPEMLLRAQEMPGLIDLMKEVFKVSTFSALPFVYKNQTFGIAVIFDTLQNQSTKRLVESFL---------
>seq84
-----LKIYDGLISKSELTATFLRRMGEilraqklsLKSIYFKYMPSVQSFVVMQSLGLDEATVQGLGGKLSPEEIKSLKDLFQIKKIPEQIQLLMEQGLKVHVYQTQVLWTQDILDGFFVFWSDRVNLEPNFCQAQFLI--------
>seq85
--------YKTCETKEQVINLFFEQMEGLKVLFFKHLPTVHSLLVTHSSGFSHEDVQGIGCQLQDHEHKDLTSQLTLGVCPGPLALLL----------------------------------------------------
>seq86
-----IKEFRAVATQEELIQKLVVFLPPkSMMLYFKFLPSIQAFVATHCCGLPQEQIQGVGCRLSLEEQSTFSADVILGRLPLSLASLLEK--------------------------------------------------
>seq87
----------------EINQKFLAKNKKLTSVYFKYLPSVSSFVALQSLGIDIETLKGIGCRLTKEETEDSKTFFAQGGVPAELKVLLNEGLNAPQAIVKPIFVQEQLDGFF----------------------------
>seq88
----------------GVLQVFIEAVSDitdgKPVIFFKFLPAYSSLVANHAAKIPVEQIKNLGVNLSSFDTKSIPELLMQPVKLQPLLEMMKEVFQVQQFFALPFVYQNQPIGVIATFTPLSSDPVRRLLESFL---------
>seq89
--------------------------------------------------MPDEWVQGLSYKI-PNSDKDFNDRMMLGEFSDNFLNYIKRKWSVDIVKVLPLIIKNEIEGLLVTPQDI----TAEVAEDFSLVSLVYN--
>seq90
----------------------------------------------HASGFTSQQIQGGGCQLQGADLENLNTQLSVGLLPPIMMEMLTQIFHIKTARVLPLFVQNQLEGVVVYSAEAGKADTLRIGEEFSLFALCYSY-
>seq91
-VRQEIEKYRDSRSKEEVLNVFLREISQkflsrnkkISTIYFKYLPSVHSFVAVQALGIDIETLRGVGGKLTDEEAKDPKSFFLSGGIP-----------------------------------------------------------
>seq92
-LEERIKEYRSAAAKEDLLSKFMQIIAPIPCIYFKYLPTVKTLLATNASIFKPEQLQNVSCELRPEEAKDFANQI-----------------------------------------------------------------
>seq93
----------KSKNLDEVIQSFLDQTQNligKPLIFLTHMPSYLSFIASHAAGIEKAKIRNLGLNLKSIDSKQYLEKIADPQSLEQLKQLMTDIFKVPDYLALPMEEEGAITAYILVLGSVEDVSLRRLLDSFvNIFK------
>seq94
------------------------------------------------QNMPEVWVEGLSYKI-PANQTDFNENVLIGNYPESFLSYIKNKWGVDSVKVLPLIMKDEIEGLFISPQDITA----EMAED-----------
>seq95
----------GTKSLDETVQAFMDELSResqrFPVIYFKYLPAHASLAISQASHLAVDKFRGLGLDLKGLEPKALSSFFQSPEKSEKLEDLLKQAFRTESYTAFTHFNEGEALGLFVIFNRAKGEAHAILDSATH---------
>seq96
------------TEVDEVLKTFLSHTSKitgdAKVLYFKHLPAYTSLLLYMAEAIDVNDFKGVGVSLKDLYPVKYNEIILSPQKIESLKNMLSEVFKMNSALVIPLISESNVAGVIA-CEAIEDKSIRRVFDNFlQVLSLNYQN-
>seq97
---------------KEAIDLYLQEVSRflgeKQIVFFKYIPSHQSLVASQSVVADVNKVRGLGFELSKEEENFSLDQLHKPMTLEGLRRLMSSSLGIDEYFAQALVVQGVPEGVFVFVGSENIQAEPYIKSCVKSLG------
>seq98
-------------SKEEVLNIFFRELNQkflsrnlkISAIYFKYLPTVQSFVAMQTLGLDLESVRGVGGKLNAEESKDPDQFFAKGAIPQSIQILL----------------------------------------------------
>seq99
--------------VTQVVDIFLKQSAEiledASILFLKHLPSYRSLSVSQAHNVDPQKVRGAGISFKELSQQDYIQRISSIADAPELAEMLVQVFQKSEYVVFPLSIENQLAGAFVFLEEIHDMQSLRLLESF----------
>seq100
--QLEFKKYFQCKTKDDVLALFLLELQQkflarnkkLNAVYFKYLPSVTSFVALQALGIEIDSLKGVGCRLTDIESADPQAFFSQGGVPTELKELMSDGL------------------------------------------------
>seq101
---------------EAVIQTLVNSTSDliqkKPALYFKYFSAHATLVASHAAQIPGDHIKNLGLKLSTAEPKQLHAWLTQepGKIP-QLTELMKEVFTCKDYVAVPVLYKNDPQGTLVL--------------------------
>seq102
---------KKAKSRDELFRlyfKWLSQFSNLNLLYFKHIPSAHALMMQYCIGLDQQKLDGLGCELNPKDQL---------KDPQSLRELIRQALGQASVQVIPLVLQNEIQGYFVYWGQQQNISADIFELYFE--NLVY---
>seq103
---------RISETKdlDETIQIFMEELSResrqFPVLYFKYLPTHASLAISQASHLAVEKFRGIGLDLKRFDPKQAAEFFKAPSRDSELNDLIKQAFRANEFAAFTHFNDSETLGLFVVFNRPAT-AAQNI--------------
>seq104
------------------------QLAGlRPVIFLKYLPAHQSLTITASAQIPFNEIRTVGVKLNHYDEKNISFVLGEPEKFKELKELMTELFKCPNFTAVPLRVEGKVVGIFAIFQAMKD--FDElVNKIVYLGALSY---
>seq105
----------------------------KPVLFLRFLPAYSSLIAGAASKLPLDQIKGLGINLSSFDPKQISELLMHPEKIAPLSEFMREVFKVADFSAFPFVHENHPIGIAL---------------------------
>seq106
----------------------------------------------HSQNMNSIEVEGFTFQIELRQVKDYMSQFSLGVVPPQMIQSIEEKLKIKLNKALPLFVEDQLEGVFVTPTAI----PEDFAEEFSLFSLTYQL-
>seq107
-------------SSDEIHSILLKEISNefrsSGVLYFRYLPTHKSMVLTSWQNVDESNTKSIGLSFKKLTQEEIQNVLLSPHLCQDLTRLMAIVFKVKEFETAPMLgIDGAINGIVILTQKFESDRQSElLNSMVSVGSLVLQ--
>seq108
------------KDVSDTIQTFLEHTSQltegTKVLFLRYLPAYYSLLLTHAAQYPMEEGKKIGINLKDIDPKSVMDHLRTPSEMPLLQNLLKQVFSVEQYLAVPVETDDEFISIVIVCRDMSDPALRRVFDSFmQIFKVSY---
>seq109
---------------------------------------------------------GVGSQLTESETQSVAETLKKGTTPQSLRNLMAEAFAVTDFILKPLFVHKNLDGLFLFWSQTGV-AADEFTNEFLIFSLLYQN-
>seq110
------TQLSKAKNIDDVINIYLSHVASlvdqKPIIFLTHVSAYLSLVVTHSIQLNKDNLKNVGVSLKDLDSKTYVEQLMNPMSMAGIQGLMKDFFKLDKYLAVPIEDETNIGGIVLVFDAIEDVSTRRLFDSFT---------
>seq111
----------KAKGVGEVVQIFTEQTSEmlggVPVAYLTYFPSMIAYGVTNCALIDKEKIKSLGINLKDKPKEEYDKILQDPYSSSFLNQFVSQYFKTTAYLAIPLVDQGGLKGLFLVLKDITDAQQRRLLDSF----------
>seq112
------------------------------------------FVAMAAHGLDIESIKGVGSRMTKEEAKDVIAFLASGKVPQSLDELMSQGLKVPQYFSQAVPIHHGLDGLMIFWGDESF-HFKKIENDFYVFLMLFQQ-
>seq113
----------------------------------------------HSSGYLQESIQGVGCQLPENENQDLVSQLSLGVCPGSLAVLMKRALGLAQVQVFPVFSQNDLEGVIVADVVPDSEFHRHLSEQFALFGLAYSHF
>seq114
------TQMSKAKEVDQVIQVYLDQVSElvgkKPVIFLSHISAYLSLAVTHSAQVNKETLKNIGISLKDLDTKVYIETLMNPMSFSDLQSLMTDFFKTSDYFAIPIEEDNTVAGIVVVFDALKDVPTRRLFDSFT---------
>seq115
---------KLAQTKEldEAVRLFINAFSRlssdTPALYFKYLPNHLSLVFSQASLLPENKFRGIGIDLKKVSDLGPDRFFENPASAKILREFVGEVFKKDRFTVFTHRVDQEVVGLFLALEQFDLKARSDLSLLIEAFDLAYK--
>seq116
--QAQIEKIQAATILSRTVETYLQAAHSrtgKPVCFFKYLPARRSLFLTNASGLNLSDYRGLGIDFVKENPEFQASHLQTPHNLQALQELIRTAFKVEGFYALPVDDGNRVRAIVVFLQ------------------------
>seq117
-VQQEFKKYLECQNKDEVLALFLLELNQkflarnkkVCAVYFKYLPSVSSFVALQTLGIEIDSLKGVGCRLTAKENADPISFFAGGGIPQ----------------------------------------------------------
>seq118
-------------------------IKKQPVIYLKYFSAHATLVAADVANLPPEYIKNLGINLSDMDPKELQRVLmEEPAKIQQLKELMKEVFKIDKYVAFPFT-------------------------------------
>seq119
--------------IEVFLTHIEPQLSDeSGALYFKFNSPRRTLVASYGLGIDPTLIEGIGLNLEKVDPEFKRSDIKQIHERDSFNGLLHEVFGVSEYSIRYLTLNEEVIGVFVHWGLLEKTPRHDfVDRSFTLLSK-----
>seq120
---------------DTVIQTYLDHVSQlvnKPAIYLSHISAYLSLAVTHSSQINKDSLKNIGVSLKDLDSKVYIEKLMYPMNLEGLQALMKDFFQTAEYFALPVEEEAGIGGVVVVFDPLKDVVTRRLFDSFT---------
>seq121
--------------AQDVVQVFVDFASHalgdLPVVFFKHLPAYFTLSITHSAVVPMAQLRGIGLNLKSEISNDYGSLLKNPAELPSFRQLLFEVFGARDFEAYPIETEEGPSGVVVALQKLEDAATKR---------------
>seq122
---------------DEVVHVFIHTFSRltsdTPVLYFKFLPSHLSLVFAQASLLPEEKFRGVGIDLRKSHPGRPEEFFENPEEAKALREFVAQVFKKNRFTALTHRVDREAVGLWVALEKFDMK-------------------
>seq123
--------------LSCAAETYLQAVHSrieHPVCFFKYLPARQSLFLAHASGLDLNAYRGLGIDFSKGKAEFQASQLRDPSSIQEFSELIQTAFKVEKFCALPIDDGDGVKAIVVFLQSN----------------------
>seq124
---------------------------------------IQSFVVLQSIGLDAEAVQGLGGKLSPEELLNIKVLFQSQQIPGQIDLLLRSGLKVEKYLGQALFIDGQLDGFFVFWSDSSEIQESFYAAEFLVFKMVYQN-
>seq125
----------------------TRSLNDTPVLYFKYFPGHMTLLFSRATLLSNDTFRGLGIDLKKEGSRTLEEHFEHPATLPGMRKMVREVFKCELFSALTHMDGNEVRGVFIILDEVEV--------------------
>seq126
-----------------FLANLHQNLSTSPVLYFRFVNSPLSLVITHASGIQVDKVRGVGFHL--QDTQSIREKLLQPEALTGFYDFLKQAFRLQE--------------------------------------------
>seq127
-----------AATKdlDETLDIFLEALAAetkAPTLYLKYLPTHASLLIAQVAQLPIEKYRGLGLDFRKEGIQNASELLRDPSEVKPLAELIRSLFQKESFVAIPHLNEGEALGVFVVLGDLDVSNSA----------------
>seq128
-------------TLDDAVQVFMNSASNalggCSAVFFKYIANRRVLIAGRGEKLQDHDLRGLGMDL-NRSVDGFRaVQLREPMHLMAFVQMVKDVFGIKDFFAWPVRALGEIQGLICFLKN-----------------------
>seq129
----------AAHSIDQAVEVYLKAMHQslaQPVCLFKYLPARRSLFLTQVMGLPPEPYRGLGVDFSKQEENFSSQVLRQPEGLSSLRDLVNSAFKTSTFAALPVDDGEGIKSVVVVLNRS----------------------
>seq130
-------------------EAYLQAVHSrtgKPVCFFKYLPARQSLFLVHASGLDLNAYRGLGIDFSKEGSEFQGSHLLDPQSIQKFRELIRKVFKVERFCSFPVDDEDGVKAVVAFL-------------------------
>seq131
----------------------------------------------------------------PEEMEDLTSQLQAGHLPSALQELMTQGLKIAKFNFKPVLVQRALDGLMIFWGEASA-HQALIENNFVIFQMLYQQ-
>seq132
---------------DHIIQAMVDEISHtsqnAPALFLRFLPAYQSLTVTASAQIPLESIKTVGIKLDRYDPRNIASVLKDPLKFLELKELMSQVFKCPEFYALPFVIGGNPIGLMISFGTSE---------------------
>seq133
------KELEMCQNKEDVVNLFLREAVRylgtKEGLYFRYLDLHRTLALAQTFGIERNDIDGVGIDLAALEPGFLPSQLSKPHLLFSLQEFVKKGLLRSGAVILPLNFRHQTIGVFIF--------------------------
>seq134
----------------------------APAVFLKYLPNRRALVTSAAHRLPAEAWKGLGLNL-SEEPDFRIGDLRHPEKLAGLGEMVQSLARAKEYWVRPMIIRDQVHGLFLVMGPSGDLPVQQMESTISVM-------
>seq135
-------------------EEISRASQKAPALFLRFMPAYQSLAVTASAQIPLESIRTVGIKLDRYDPRNVSSVLRDPLKFPELQELMGQVFKCSEFYALPFIVGGNPIGLVISFGKSEN-LLKIFSSLVIISELAYN--
>seq136
--------------------------GQAPAVFLRYLPNRRCLVTTAAHRIPAEAWKGLGLNLSEEPDFRI-GDLRHPEKLPGLREMAQSLGQTNELWVRPLVIREEVHGLFVVLA------------------------
>seq137
---------------SDAVALFIDEANVlieNPIVFFRHAPTYSSIVYSQSAGLERGDYKNVGLNFRELNDRVYPNNLKNPDNIEPLRELLAKIFQVTKYETHPLIIDGDVAGFFVCFTEINEPPMLRlFHAAFEILQMK----
>seq138
----QVEQIQASMTLPNAVEAYLQAVHVrtgKPVCFFKYLPNRRSLFLAYASGLDLSTYRGLGIDFNKEKFEFQKGLLRDPQNIQVFKDMIQTAFKVERFCTFPVEDANGVISIIVFLKA-----------------------
>seq139
-------------------------VNSAQAVYFKYNAVRKAIVASYGLGVDPALIEGIGLNLERVDPSFQRSEIRVLHERESFQNLLRDVFKVKTFHCHYLSVGGEVIGVFVHWGLSENSE------------------
>seq140
----------HAKDVHEIIQRFMNSANTafsqAPVIFFRHLQTQDQFVLSQLVGINEPTLKGVQLGLSSEQI---------EQDPDALKDSIMALFEKDGFEFRFLKEDQQIIGLFVILKSFNDPYERKcLSHLFDIFDLSYQQ-
>seq141
---------LAASSIDRAVEVFLKAMHQtlsQPICLFKYLPARRSLFLTQVVGLPAEPYRGLGVDFSKQDENFSSQSLRQPETLPRLRELMQSAFKAST--------------------------------------------
>seq142
---------KMAQAKDtvEVIDFFLNQVSEltgKPCIYLKHFSSFSSLVVSNSSILEIEKLKKVGLSFKEEDPKTYHAQVLDPSKMTKLNALMAQAFKVEK--------------------------------------------
>seq143
-------------------------LASCGMVYFKYIANRRVLMATQAHKLDIEW-TGLGVNFNETGDTFRTQHLREPGQIVEVQNMMREAFHAGEFFAFPVETLGEIQGVVAFLRPEPDAATMKmIQD------------
>seq144
-FSEEFMRYQTAKTLDSLLAIFFERVQKisrknYSGLFLKAIPSIDSLAVFGGTAFDWQKWKGYGVKVQ-----SFQQNPRRPLVISEVVEFMREVVGVEQFVSFPFAFENHLEGVFLFWCHQDKPQSRDLE-------------
>seq145
--------------------------------FFKYIANRRVLMAGQAHKLDDFDLNGLGINFNEANSDFRSSQLRDPHGLNELQAIMAEVFNASEFVALPVEALGEIQGIVILLRNDPDPAGQQkLQEWVFLLS------
>seq146
---------------DGVLQTLLGSLSQlidgKPALYFKFLPAYNSLVVGLSEKIPIDGLRTVGISLEKYNLKEVPTLLTTPEKIPQLNDLMGEVFK-----------------------------------------------
>seq147
----------KAQSLDELGINLVWSLKNivssrRKGIYFKYLPTYCSLVALGGFNFEDKNISGVGLNFASSKDFNAAQHLHKLMYVPAFLKVVERLFAHTNVTVTTLSCENEVRGVLVCEKPPSR--------------------
>seq148
---------------DDAIQLFLREAIRylqtKEALYFRYREVHHTLVLTKSVGIDLADVDGVGIDLAEHEPGFVPALLQKPHMLFSLQEFVKKGLLRQHAVILPHIFRGQVLGVFVLPCDKGLARVHD---------------
>seq149
----------RSTTRDDSIDLFLREATRylqiKEGLFFRYREIYQTLVLTHANGLDPKKFGDVGIDLAAHEPGFVPALLKKPHLLYSVQEFVKKGLSRQSAVVLPFFYRDQIYGVFVLP-------------------------
>seq150
-------KISVITNINDVVDCFVQSLFElinRPAIYLKYVPSHTSLIVTHVAGLDINKFKNAGILF-KEEPQSYLEKIKHPQKFQQLQEFMLHIFQVQSYFPIP---------------------------------------
>seq151
---------------DETIQIFLETLSRewndVPVLYFKYLPSHASLPLALGAGQKIEKFRGFGVDLRKESPDQIVEFFRAPESSEILKRFMKEVFASDNFTPFTHTTEGEALGLFVALTKTP---------------------
>seq152
-----------AEAVQLVVNELHRHMPFSQVVFLKHIRGRSTLVAESSSGIEMQALRSVGVDLKQTEPSFKEALLLRPEKLVGVTDLVRSGFDNRHFAAFPVVVQKEVWGI-----------------------------
>seq153
----------------------------RKGIYFKYLPTYCSLVALGGFNFDEHSVSGVGLNFSSSKDFNAAQHLHKLIYVPAFLKIVERLFEHTNVNVKTLVCDGEVRGVLVYDKSPSM------S-------------
>seq154
----------------------------------------------SGHHVSASSLKGVGFQL-EGTIENLIAQVQNGEIPKQLQTLMREGFQAGQYLCLPVWVSGDLDALLVVWSPSEVLRAEIFANHEALFSILYE--
>seq155
-------------SLTDIVTHLCSEIHKensCDVVYFKYIDSQGTLVAAHSEGLAFETIRGIGIDFISSGKKFFRDQLHLPASLMEMKELVQQVFNEKEF-------------------------------------------
>seq156
----------------------------------------------------------VGAKLTVDEYRALENLLRARTLPESLRKVIQEAFRIEKFTTFPLFLKNQVEGALVFWGLDLN---EGDWGEFLIFQLCYQN-
>seq157
----------NCDSESEAVKHCLGEIARslgrGQIVFFRFIRGRATLVAEAANGISAEAISNIGVELKKTEPKFNEKLLQRPERLLGLLDLVRNGFMQRQFAAFTVEIDRVPEGVILIL-------------------------
>seq158
--------------------------------------------------LDMEELKGVGCRLVESEANDPVSFFAQGGIPAELVGLAREGLNANEPIFRPIFVLGELDGFMIFWSRQNEIYPEELDNDLSLFQLMYER-
>seq159
---------------QNAIELYMKEVSRylkgAAVIYFKYIPGYESLVVTQSVGHDLNELSGVGLNLLEEEKNFDQEKLKFPQQLNSIRRLMKE--------------------------------------------------
>seq160
-------------------------------LFFKHLPAYFTLSVTHSAVAPMAQLRGVGLNLKDELNADYLQLLKSPSEMAGLKTLLFELFGAREFIGYPIETDEGISGLVVALHGLEDPASRRLFEAFTrLFEMQY---
>seq161
---------------------------KRKGVFFKYLPTYCSLVAMGSFFFDSpKKLNGVGLNFSKSVKFKPSEHLQHGLKVPAFKKLCEKIFGHKNLNIRVLSVDHEVQGILVYEKPPANSL------------------
>seq162
------------SSVDECVQVFLASAAQalgsCPAVFFRYIANRRVLLAAYGEQMEAVDLSGLGLDLNETAPGFRTVQLREPMRIIPFVEMVKVVFAVPEFFAWPIHALNEIQGLACF--------------------------
>seq163
---------------EDIVNAFLTYLSElvdgKICLFLKFYPAKSALVVRNIKGHDLEKIyteqdisdfKNIGMSLGPASEKDIVSIVARIARHPSLKTLVTKLFNTSKYMAYPLIIRDTPIGVTIVVDEMTLSERDDkiLKQYLNQLEISY---
>seq164
----------------------------RKGLYFKYLPTYCSLVALDGFNFSNKKFNGVGLNFSSSKDFDANQHLNKLKQVPGFIKVIERVFGHQNINLKTLECDGEAKGVLVYEQAPR---------------------
>seq165
---------------SDTIQVFLEHTSQlaenSKVLFLRYLPSYYSLLLSHAASYQMEEGKKIGLNLKEIDPKKIMDILRLPQEMDLLKNLLTG--------------------------------------------------
>seq166
---RLMQAFSKAKDIDAVIQIYLEHTSQiignKPIVFFTHLSSYLSLLVSHVVGYEKEALRNVGVNLKSVESKEYMNLL-----------------------------------------------------------------
>seq167
-------------------VMMSEQTTKAPAVFLRHLPNRRCLVTRAAHRLPAEAWKSLGLHL-NEEPDFCLSDLRHPEKLSGLKEMGQTLVGHDEIWVRPLILRDEVYGLFVVFSALIDLPMNRLESIVK---------
>seq168
---------------------------KilpekRKGVFFKYLPTYCSLVAMGSFFFENpKKINGLGLNFSKSVKFKPRQHLQYGLKVPAFTKLCEKLFGHKKLNIRVLSADQDVKGILVYEKPPANSL------------------
>seq169
---------KKAKSLDELGINLVSSLNKivlpgRKGVYFKYLPTYCSLVALGGFNFESKKVTGVGLNFSTSKDFDASKHLQQLMYVPAFLKVVERLFEHTDVTVRTFDCDRESKGVVVYED------------------------
>seq170
---------------------------TqgRKGIYFKYLPTYCSLVALGGFNFKNTKVNGVGLNFSSSKDFNASQHLQKLLHVPAFIKVVERLFEHTEVVVRTFECDGETKGVAVYEKPPGG------SSDIEVLSLC----
>seq171
---------------DQCVQLFMESVSRvfsdVPILYFRYVASHMSLLVSQAVWLPIEKIRGIGVDLKNEDPARLPECFRDPSRLEPLKTLVQQVFR-----------------------------------------------
>seq172
-------------------------------IFFKYLPAHLSLVTSHASKIPLEQIKNLGINLSQLSQVDATKISEMLLQPSSlpgLPDLMREVFQLQAYEAIPFVHQN----------------------------------
>seq173
--------------------------------------------------INKETLKNIGVSIKDLDQKDYVEKLANPMELVGLKTLMKDFFQTTEYFAVPVEEDSAIAGIIVVFDPMKDVSVRRLFDSF----------
>seq174
--------------ADDCIQIFLQSCSNmlgsCGVIYMKYIANRRVLMTTLAHRIDAEW-NGIGVNFNETTGDSFR--TAHLREPNNipeVKQMIHEVFHTEEFFAHP---------------------------------------
>seq175
---EAILNLKKAQSLDELgvnLSWSLNHIVIdeRKGMYFKYLPTYCSLVSIGGFNLKDKKTNGVGLNF--SSSKDFNSLMHLREVLNVpaFENIVKKFFEHTDVQTRLFECDGGVKSLLVYERAPGGS-------------------
>seq176
-----------RNSVNECVQDFLDFGSKllgdCGAIYLKCLPLRK--VLSATHGVALENWKGVGVNLADESHFTWGALQEPQNVP-AIREMVREIFNRSDFQAFTFKVAKEVNGIALFFS------------------------
>seq177
-------------SIDDAMNVFLKNVSSvlgsPPVLYMKYIANRRVLMASQSQNLESFDLNGLGVNFNELNANFRASQLHDPQAIPEVGSLVKEVF------------------------------------------------
>seq178
------------NSADECLRVFLESASHslggCAAVYFRYIPNRRVLLAGHAINTHGIELKGLGINFNEVAPGFRTAQLRDPMGIPEFSEMIREIFGVSEFMAWPVE-------------------------------------
>seq179
--------LKKCETKKDLIKCFLVEFSRyykgAPVVFYKYVRSYKSLVSSMHLGVKSFKS-GEIIKLTAKEDGQLNNDKLDSIYLESLENI---AFDDEeGVYFLPLSILGELKGLFVFSRQEA---------------------
>seq180
--------------------------SKRKGIFLKYLPTYCSLVATSSFFFDSpRKLNGLGLNFSKAIKFNPKEHLQQGLKIPALAKLCYKTFGHRMLNSRVLTSDKDIQGILVYEKP-----------------------
>seq181
---------------DEIIETFLSYTSEllggATCVFLKYYPQKTALVARHVQcrsehcpytKEQLEGIKNVGMSLGVAGEKDIVSVISKITNHPSLKTLVYKLFNTSKYIAFPLIIRDTPLGVTLIVdvNSLGSKEDKIVQQ------------
>seq182
---------------DEIIETFLSYTSSllggSTCVFLKYFPQKTALVVRHIQckgqcsfsKEQMEALKNVGMSLGVAGEKDIVSIVSKISNHPSLKTLVNKLFNTTKYLAYPLIIRDTPLGVTlIVDQNsLDPKEEKIVQQYLN---------
>seq183
-----------CTSFTDSIDCFMSEISRylknTSVMYLKYVPAYRSLVTSRSVHLPDF-TSGESFDL-KTLFKENNLNETNFEKSNEFRDKIYDYTYWDNFSIIALRINGDVKGLFIIKTDFVNE-------------------
>seq184
------------------------------------------LVLTQSTGVDSNEFDGVGIDLAEHEPGFVPALLQRPHLLFSLQEFVKKGLGRQNAVILPLIFRGHVLGIFVLPEDMDKAMRH----------------
>seq185
-------------------------FNCQSLVYFRYLKSYSSLLVTHSEGLKFSDLRGKGISFSTNQNFVPERDLKRIDSNPLFYELVRKLIPNQAYTSFLFEACGEPKGVFVLANA-----------------------
>seq186
-----------CHSVEDAIQDWLNKINKlyqdTPTVFFRYIPNHSHLVLSQCAGLDLQKVRGIGVPLAGLSLKEQKYFYSHVRFLANLRDLVKGAFNVSEFEFRELVTDKAVLGLCVIFKNLKNESEKRfFNDSIELLNLVA---
>seq187
------------------------------WIawYFKFMPEVQAFVVTQYRRFNDKQTFPFsSFKPQTVSVTDLFALLHAGDRNVELFSFVAKHFDITNLQVFPISYSNLVDGIFCFIGQSEEK-------------------
>seq188
----------------------VRYYKTDLALYFRYNPGAGTLVVMRASGLPLEHFQAVGIHLRKNE-PGFTEEIL--HQPNRiriLREFVIAGMNRLDFVAFPHVENKIVRGLFVVPAKKNQ--------------------
>seq189
-------------------------------------------LVSHTACLPIDKFRGIGVQLHSQSAIDLAGQLNEPMRIDGLRQLVAEVFRRDNFFAFTHSSEGEILGVCIV--------------------------
>seq190
--------------VHETIQVFLDHVSQlfddSRVVFLRYLPAYYSLMVSHTAKIavqqpptaNPEEARKVGINLKDIDPKTVLDQLKNPQTFVPLSELMSE--------------------------------------------------
>seq191
------------TTRPQMIEKWMREALRvhctNEILYMSYLGTKKTLIVTQSLGFAADELDDVGLDLAKEEPGFEESMLQRPEKLWALHQFVTKGLQRREALYYSLMHHGQILGVFILPAKEGE--------------------
>seq192
-----------------IIDTFLSYMSElingKTCIFLKYYPAKTALIIKHMAGKNigtlysAEQIEGfknVGMSLGVAGEKDIVSIVSRIANHPSLKTLVSKLFNTTKYLAYPLIIRDTPIGVtlLVDIDSVNEQDEKIIKQYLNQFEISYD--
>seq193
-----------------------------------------------------------------EESKDPEKLFSEGKAPESILKLTKEGLGAEEPVMKFVSIRGSLDGIMAVWGQGSPVFWDQFDHEFSLFNLLYE--

Двоичные данные
tests/graphmodel_test/T1001.npz Normal file

Двоичный файл не отображается.

Просмотреть файл

@ -0,0 +1,44 @@
import os
import numpy as np
import string
from typing import Iterable
def parse_a3m(filename):
seqs = []
table = str.maketrans(dict.fromkeys(string.ascii_lowercase))
# read file line by line
for line in open(filename,"r"):
# skip labels
if line[0] != '>':
# remove lowercase letters and right whitespaces
seqs.append(line.rstrip().translate(table))
return seqs
def logits2value(logits, bins):
preds = np.argmax(logits, axis=2)
retval = np.zeros(preds.shape)
for i in range(len(preds)):
for j in range(len(preds)):
retval[i,j] = bins[preds[i,j]]
return retval
def loadT1001(preprocess=True):
sample = np.load('T1001.npz')
sample_dist = sample['dist']
sample_omega = sample['omega']
sample_theta = sample['theta']
sample_phi = sample['phi']
seq = parse_a3m('T1001.a3m')[0]
if not preprocess:
return sample_dist, sample_omega, sample_theta, sample_phi, seq
else:
dist = logits2value(sample_dist, [None] + list(np.linspace(2,20,37)))
omega = logits2value(sample_omega, [None] + list(np.linspace(-180,180, 24)))
theta = logits2value(sample_theta, [None] + list(np.linspace(-180,180, 24)))
phi = logits2value(sample_theta, [None] + list(np.linspace(0,180, 24)))
return dist, omega, theta, phi, seq

Просмотреть файл

@ -0,0 +1,46 @@
from T1001_loader import *
import json, time, os, sys, glob
import torch
import torch.nn as nn
sys.path.insert(0, '../..')
from sequence_models.graphmodel_utils import *
from sequence_models.utils import Tokenizer
# load features
dist, omega, theta, phi, seq = loadT1001()
dist = torch.from_numpy(dist)
omega = torch.from_numpy(omega)
theta = torch.from_numpy(theta)
phi = torch.from_numpy(phi)
# process features
V = get_node_features(omega, theta, phi)
E_idx = get_k_neighbors(dist, 10)
E = get_edge_features(dist, omega, theta, phi, E_idx)
mask = get_mask(E)
E = replace_nan(E)
L = len(seq)
S = get_S_enc(seq, tokenizer)
# reshape
V = V.view(1,140,10).float()
E = E.view(1,140,10,6).float()
E_idx = E_idx.view(1,140,10)
mask = mask.view(1,140)
S = S.view(1,140).long()
L = [140]
decoder = Struct2Seq_decoder(num_letters=20,
node_features=10,
edge_features=6,
hidden_dim=128,
k_neighbors=30,
protein_features='full',
dropout=0.10,
use_mpnn=False)
with torch.no_grad():
decoder.eval()
output = decoder(V, E, E_idx, S, L,mask)