init files for graph model decoder
This commit is contained in:
Родитель
87d32ae6b2
Коммит
d92e105f88
|
@ -0,0 +1,428 @@
|
|||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
# The following gather functions
|
||||
def gather_edges(edges, neighbor_idx):
|
||||
# Features [B,N,N,C] at Neighbor indices [B,N,K] => Neighbor features [B,N,K,C]
|
||||
neighbors = neighbor_idx.unsqueeze(-1).expand(-1, -1, -1, edges.size(-1))
|
||||
edge_features = torch.gather(edges, 2, neighbors)
|
||||
return edge_features
|
||||
|
||||
def gather_nodes(nodes, neighbor_idx):
|
||||
# Features [B,N,C] at Neighbor indices [B,N,K] => [B,N,K,C]
|
||||
# Flatten and expand indices per batch [B,N,K] => [B,NK] => [B,NK,C]
|
||||
neighbors_flat = neighbor_idx.view((neighbor_idx.shape[0], -1))
|
||||
neighbors_flat = neighbors_flat.unsqueeze(-1).expand(-1, -1, nodes.size(2))
|
||||
# Gather and re-pack
|
||||
neighbor_features = torch.gather(nodes, 1, neighbors_flat)
|
||||
neighbor_features = neighbor_features.view(list(neighbor_idx.shape)[:3] + [-1])
|
||||
return neighbor_features
|
||||
|
||||
def gather_nodes_t(nodes, neighbor_idx):
|
||||
# Features [B,N,C] at Neighbor index [B,K] => Neighbor features[B,K,C]
|
||||
idx_flat = neighbor_idx.unsqueeze(-1).expand(-1, -1, nodes.size(2))
|
||||
neighbor_features = torch.gather(nodes, 1, idx_flat)
|
||||
return neighbor_features
|
||||
|
||||
def cat_neighbors_nodes(h_nodes, h_neighbors, E_idx):
|
||||
h_nodes = gather_nodes(h_nodes, E_idx)
|
||||
h_nn = torch.cat([h_neighbors, h_nodes], -1)
|
||||
return h_nn
|
||||
|
||||
|
||||
class Normalize(nn.Module):
|
||||
def __init__(self, features, epsilon=1e-6):
|
||||
super(Normalize, self).__init__()
|
||||
self.gain = nn.Parameter(torch.ones(features))
|
||||
self.bias = nn.Parameter(torch.zeros(features))
|
||||
self.epsilon = epsilon
|
||||
|
||||
def forward(self, x, dim=-1):
|
||||
mu = x.mean(dim, keepdim=True)
|
||||
sigma = torch.sqrt(x.var(dim, keepdim=True) + self.epsilon)
|
||||
gain = self.gain
|
||||
bias = self.bias
|
||||
# Reshape
|
||||
if dim != -1:
|
||||
shape = [1] * len(mu.size())
|
||||
shape[dim] = self.gain.size()[0]
|
||||
gain = gain.view(shape)
|
||||
bias = bias.view(shape)
|
||||
return gain * (x - mu) / (sigma + self.epsilon) + bias
|
||||
|
||||
|
||||
class TransformerLayer(nn.Module):
|
||||
def __init__(self, num_hidden, num_in, num_heads=4, dropout=0.1):
|
||||
super(TransformerLayer, self).__init__()
|
||||
self.num_heads = num_heads
|
||||
self.num_hidden = num_hidden
|
||||
self.num_in = num_in
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
self.norm = nn.ModuleList([Normalize(num_hidden) for _ in range(2)])
|
||||
|
||||
self.attention = NeighborAttention(num_hidden, num_in, num_heads)
|
||||
self.dense = PositionWiseFeedForward(num_hidden, num_hidden * 4)
|
||||
|
||||
def forward(self, h_V, h_E, mask_V=None, mask_attend=None):
|
||||
""" Parallel computation of full transformer layer """
|
||||
# Self-attention
|
||||
dh = self.attention(h_V, h_E, mask_attend)
|
||||
h_V = self.norm[0](h_V + self.dropout(dh))
|
||||
|
||||
# Position-wise feedforward
|
||||
dh = self.dense(h_V)
|
||||
h_V = self.norm[1](h_V + self.dropout(dh))
|
||||
|
||||
if mask_V is not None:
|
||||
mask_V = mask_V.unsqueeze(-1)
|
||||
h_V = mask_V * h_V
|
||||
return h_V
|
||||
|
||||
def step(self, t, h_V, h_E, mask_V=None, mask_attend=None):
|
||||
""" Sequential computation of step t of a transformer layer """
|
||||
# Self-attention
|
||||
h_V_t = h_V[:,t,:]
|
||||
dh_t = self.attention.step(t, h_V, h_E, mask_attend)
|
||||
h_V_t = self.norm[0](h_V_t + self.dropout(dh_t))
|
||||
|
||||
# Position-wise feedforward
|
||||
dh_t = self.dense(h_V_t)
|
||||
h_V_t = self.norm[1](h_V_t + self.dropout(dh_t))
|
||||
|
||||
if mask_V is not None:
|
||||
mask_V_t = mask_V[:,t].unsqueeze(-1)
|
||||
h_V_t = mask_V_t * h_V_t
|
||||
return h_V_t
|
||||
|
||||
|
||||
class MPNNLayer(nn.Module):
|
||||
def __init__(self, num_hidden, num_in, dropout=0.1, num_heads=None, scale=30):
|
||||
super(MPNNLayer, self).__init__()
|
||||
self.num_hidden = num_hidden
|
||||
self.num_in = num_in
|
||||
self.scale = scale
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
self.norm = nn.ModuleList([Normalize(num_hidden) for _ in range(2)])
|
||||
|
||||
self.W1 = nn.Linear(num_hidden + num_in, num_hidden, bias=True)
|
||||
self.W2 = nn.Linear(num_hidden, num_hidden, bias=True)
|
||||
self.W3 = nn.Linear(num_hidden, num_hidden, bias=True)
|
||||
|
||||
self.dense = PositionWiseFeedForward(num_hidden, num_hidden * 4)
|
||||
|
||||
def forward(self, h_V, h_E, mask_V=None, mask_attend=None):
|
||||
""" Parallel computation of full transformer layer """
|
||||
|
||||
# Concatenate h_V_i to h_E_ij
|
||||
h_V_expand = h_V.unsqueeze(-2).expand(-1,-1,h_E.size(-2),-1)
|
||||
h_EV = torch.cat([h_V_expand, h_E], -1)
|
||||
|
||||
h_message = self.W3(F.relu(self.W2(F.relu(self.W1(h_EV)))))
|
||||
if mask_attend is not None:
|
||||
h_message = mask_attend.unsqueeze(-1) * h_message
|
||||
dh = torch.sum(h_message, -2) / self.scale
|
||||
|
||||
h_V = self.norm[0](h_V + self.dropout(dh))
|
||||
|
||||
# Position-wise feedforward
|
||||
dh = self.dense(h_V)
|
||||
h_V = self.norm[1](h_V + self.dropout(dh))
|
||||
|
||||
if mask_V is not None:
|
||||
mask_V = mask_V.unsqueeze(-1)
|
||||
h_V = mask_V * h_V
|
||||
return h_V
|
||||
|
||||
|
||||
class PositionWiseFeedForward(nn.Module):
|
||||
def __init__(self, num_hidden, num_ff):
|
||||
super(PositionWiseFeedForward, self).__init__()
|
||||
self.W_in = nn.Linear(num_hidden, num_ff, bias=True)
|
||||
self.W_out = nn.Linear(num_ff, num_hidden, bias=True)
|
||||
|
||||
def forward(self, h_V):
|
||||
h = F.relu(self.W_in(h_V))
|
||||
h = self.W_out(h)
|
||||
return h
|
||||
|
||||
|
||||
class NeighborAttention(nn.Module):
|
||||
def __init__(self, num_hidden, num_in, num_heads=4):
|
||||
super(NeighborAttention, self).__init__()
|
||||
self.num_heads = num_heads
|
||||
self.num_hidden = num_hidden
|
||||
|
||||
# Self-attention layers: {queries, keys, values, output}
|
||||
self.W_Q = nn.Linear(num_hidden, num_hidden, bias=False)
|
||||
self.W_K = nn.Linear(num_in, num_hidden, bias=False)
|
||||
self.W_V = nn.Linear(num_in, num_hidden, bias=False)
|
||||
self.W_O = nn.Linear(num_hidden, num_hidden, bias=False)
|
||||
return
|
||||
|
||||
def _masked_softmax(self, attend_logits, mask_attend, dim=-1):
|
||||
""" Numerically stable masked softmax """
|
||||
negative_inf = np.finfo(np.float32).min
|
||||
attend_logits = torch.where(mask_attend > 0, attend_logits, torch.tensor(negative_inf))
|
||||
attend = F.softmax(attend_logits, dim)
|
||||
attend = mask_attend * attend
|
||||
return attend
|
||||
|
||||
def forward(self, h_V, h_E, mask_attend=None):
|
||||
""" Self-attention, graph-structured O(Nk)
|
||||
Args:
|
||||
h_V: Node features [N_batch, N_nodes, N_hidden]
|
||||
h_E: Neighbor features [N_batch, N_nodes, K, N_hidden]
|
||||
mask_attend: Mask for attention [N_batch, N_nodes, K]
|
||||
Returns:
|
||||
h_V: Node update
|
||||
"""
|
||||
|
||||
# Queries, Keys, Values
|
||||
n_batch, n_nodes, n_neighbors = h_E.shape[:3]
|
||||
n_heads = self.num_heads
|
||||
|
||||
d = int(self.num_hidden / n_heads)
|
||||
Q = self.W_Q(h_V).view([n_batch, n_nodes, 1, n_heads, 1, d])
|
||||
K = self.W_K(h_E).view([n_batch, n_nodes, n_neighbors, n_heads, d, 1])
|
||||
V = self.W_V(h_E).view([n_batch, n_nodes, n_neighbors, n_heads, d])
|
||||
|
||||
# Attention with scaled inner product
|
||||
attend_logits = torch.matmul(Q, K).view([n_batch, n_nodes, n_neighbors, n_heads]).transpose(-2,-1)
|
||||
attend_logits = attend_logits / np.sqrt(d)
|
||||
|
||||
if mask_attend is not None:
|
||||
# Masked softmax
|
||||
mask = mask_attend.unsqueeze(2).expand(-1,-1,n_heads,-1)
|
||||
attend = self._masked_softmax(attend_logits, mask)
|
||||
else:
|
||||
attend = F.softmax(attend_logits, -1)
|
||||
|
||||
# Attentive reduction
|
||||
h_V_update = torch.matmul(attend.unsqueeze(-2), V.transpose(2,3))
|
||||
h_V_update = h_V_update.view([n_batch, n_nodes, self.num_hidden])
|
||||
h_V_update = self.W_O(h_V_update)
|
||||
return h_V_update
|
||||
|
||||
def step(self, t, h_V, h_E, E_idx, mask_attend=None):
|
||||
""" Self-attention for a specific time step t
|
||||
|
||||
Args:
|
||||
h_V: Node features [N_batch, N_nodes, N_hidden]
|
||||
h_E: Neighbor features [N_batch, N_nodes, K, N_in]
|
||||
E_idx: Neighbor indices [N_batch, N_nodes, K]
|
||||
mask_attend: Mask for attention [N_batch, N_nodes, K]
|
||||
Returns:
|
||||
h_V_t: Node update
|
||||
"""
|
||||
# Dimensions
|
||||
n_batch, n_nodes, n_neighbors = h_E.shape[:3]
|
||||
n_heads = self.num_heads
|
||||
d = self.num_hidden / n_heads
|
||||
|
||||
# Per time-step tensors
|
||||
h_V_t = h_V[:,t,:]
|
||||
h_E_t = h_E[:,t,:,:]
|
||||
E_idx_t = E_idx[:,t,:]
|
||||
|
||||
# Single time-step
|
||||
h_V_neighbors_t = gather_nodes_t(h_V, E_idx_t)
|
||||
E_t = torch.cat([h_E_t, h_V_neighbors_t], -1)
|
||||
|
||||
# Queries, Keys, Values
|
||||
Q = self.W_Q(h_V_t).view([n_batch, 1, n_heads, 1, d])
|
||||
K = self.W_K(E_t).view([n_batch, n_neighbors, n_heads, d, 1])
|
||||
V = self.W_V(E_t).view([n_batch, n_neighbors, n_heads, d])
|
||||
|
||||
# Attention with scaled inner product
|
||||
attend_logits = torch.matmul(Q, K).view([n_batch, n_neighbors, n_heads]).transpose(-2,-1)
|
||||
attend_logits = attend_logits / np.sqrt(d)
|
||||
|
||||
if mask_attend is not None:
|
||||
# Masked softmax
|
||||
# [N_batch, K] -=> [N_batch, N_heads, K]
|
||||
mask_t = mask_attend[:,t,:].unsqueeze(1).expand(-1,n_heads,-1)
|
||||
attend = self._masked_softmax(attend_logits, mask_t)
|
||||
else:
|
||||
attend = F.softmax(attend_logits / np.sqrt(d), -1)
|
||||
|
||||
# Attentive reduction
|
||||
h_V_t_update = torch.matmul(attend.unsqueeze(-2), V.transpose(1,2))
|
||||
return h_V_t_update
|
||||
|
||||
class Struct2Seq_decoder(nn.Module):
|
||||
def __init__(self, num_letters, node_features, edge_features,
|
||||
hidden_dim, num_encoder_layers=3, num_decoder_layers=3,
|
||||
vocab=20, k_neighbors=30, protein_features='full', augment_eps=0.,
|
||||
dropout=0.1, forward_attention_decoder=True, use_mpnn=False):
|
||||
""" Graph labeling network """
|
||||
super(Struct2Seq_decoder, self).__init__()
|
||||
|
||||
# Hyperparameters
|
||||
self.node_features = node_features
|
||||
self.edge_features = edge_features
|
||||
self.hidden_dim = hidden_dim
|
||||
|
||||
# Embedding layers
|
||||
self.W_v = nn.Linear(node_features, hidden_dim, bias=True)
|
||||
self.W_e = nn.Linear(edge_features, hidden_dim, bias=True)
|
||||
self.W_s = nn.Embedding(vocab, hidden_dim)
|
||||
layer = TransformerLayer if not use_mpnn else MPNNLayer
|
||||
|
||||
# Encoder layers
|
||||
self.encoder_layers = nn.ModuleList([
|
||||
layer(hidden_dim, hidden_dim*2, dropout=dropout)
|
||||
for _ in range(num_encoder_layers)
|
||||
])
|
||||
|
||||
# Decoder layers
|
||||
self.forward_attention_decoder = forward_attention_decoder
|
||||
self.decoder_layers = nn.ModuleList([
|
||||
layer(hidden_dim, hidden_dim*3, dropout=dropout)
|
||||
for _ in range(num_decoder_layers)
|
||||
])
|
||||
self.W_out = nn.Linear(hidden_dim, num_letters, bias=True)
|
||||
|
||||
# Initialization
|
||||
for p in self.parameters():
|
||||
if p.dim() > 1:
|
||||
nn.init.xavier_uniform_(p)
|
||||
|
||||
def _autoregressive_mask(self, E_idx):
|
||||
N_nodes = E_idx.size(1)
|
||||
ii = torch.arange(N_nodes)
|
||||
ii = ii.view((1, -1, 1))
|
||||
mask = E_idx - ii < 0
|
||||
mask = mask.type(torch.float32)
|
||||
return mask
|
||||
|
||||
def forward(self, V, E, E_idx, S, L, mask):
|
||||
""" Graph-conditioned sequence model """
|
||||
|
||||
# Prepare node and edge embeddings
|
||||
# V, E, E_idx = self.features(X, L, mask)
|
||||
if V is None:
|
||||
h_V = torch.zeros(S.shape[0], S.shape[1], 128).float()
|
||||
h_E = torch.zeros(S.shape[0], S.shape[1], 1, 128).float()
|
||||
E_idx = torch.from_numpy(np.array([1] + list(range(0,139))).reshape(S.shape[0],S.shape[1],1)).long()
|
||||
else:
|
||||
h_V = self.W_v(V)
|
||||
h_E = self.W_e(E)
|
||||
# print('h_E: ', h_E.shape)
|
||||
|
||||
# Encoder is unmasked self-attention # SKIP
|
||||
# mask_attend = gather_nodes(mask.unsqueeze(-1), E_idx).squeeze(-1)
|
||||
# mask_attend = mask.unsqueeze(-1) * mask_attend
|
||||
# for layer in self.encoder_layers:
|
||||
# h_EV = cat_neighbors_nodes(h_V, h_E, E_idx)
|
||||
# h_V = layer(h_V, h_EV, mask_V=mask, mask_attend=mask_attend)
|
||||
|
||||
# Concatenate sequence embeddings for autoregressive decoder
|
||||
h_S = self.W_s(S)
|
||||
h_ES = cat_neighbors_nodes(h_S, h_E, E_idx)
|
||||
|
||||
# Build encoder embeddings
|
||||
h_ES_encoder = cat_neighbors_nodes(torch.zeros_like(h_S), h_E, E_idx)
|
||||
h_ESV_encoder = cat_neighbors_nodes(h_V, h_ES_encoder, E_idx)
|
||||
|
||||
# Decoder uses masked self-attention
|
||||
"""
|
||||
mask_attend : autoregressive mask
|
||||
mask : input mask, to mask the nodes/edges with nan values
|
||||
mask_bw : applies both masks together
|
||||
"""
|
||||
mask_attend = self._autoregressive_mask(E_idx).unsqueeze(-1)
|
||||
mask_1D = mask.view([mask.size(0), mask.size(1), 1, 1])
|
||||
mask_bw = mask_1D * mask_attend
|
||||
|
||||
if self.forward_attention_decoder:
|
||||
"""
|
||||
mask_fw : basically opposite of mask_bw
|
||||
"""
|
||||
mask_fw = mask_1D * (1. - mask_attend)
|
||||
h_ESV_encoder_fw = mask_fw * h_ESV_encoder
|
||||
else:
|
||||
h_ESV_encoder_fw = 0
|
||||
|
||||
|
||||
for layer in self.decoder_layers:
|
||||
# Masked positions attend to encoder information, unmasked see.
|
||||
h_ESV = cat_neighbors_nodes(h_V, h_ES, E_idx)
|
||||
h_ESV = mask_bw * h_ESV + h_ESV_encoder_fw
|
||||
h_V = layer(h_V, h_ESV, mask_V=mask)
|
||||
|
||||
logits = self.W_out(h_V)
|
||||
log_probs = F.log_softmax(logits, dim=-1)
|
||||
return log_probs
|
||||
|
||||
|
||||
############## OUR UTIL FUNCTIONS #################
|
||||
# get node features
|
||||
def get_node_features(omega, theta, phi):
|
||||
# omega is symmetric
|
||||
n1 = torch.cat((torch.tensor([0]), torch.diagonal(omega, offset=1)))
|
||||
|
||||
# theta is asymmetric
|
||||
n2 = torch.cat((torch.diagonal(theta, offset=1), torch.tensor([0])))
|
||||
n3 = torch.cat((torch.tensor([0]), torch.diagonal(theta, offset=-1)))
|
||||
|
||||
# phi is asymmetric
|
||||
n4 = torch.cat((torch.diagonal(phi, offset=1), torch.tensor([0])))
|
||||
n5 = torch.cat((torch.tensor([0]), torch.diagonal(phi, offset=-1)))
|
||||
|
||||
ns = torch.stack([n1, n2, n3, n4, n5], dim=1)
|
||||
|
||||
# # maybe add secondary structure
|
||||
|
||||
return torch.cat([torch.sin(ns), torch.cos(ns)], dim=1)
|
||||
|
||||
def get_k_neighbors_idx(array, k):
|
||||
return torch.topk(array, k, largest=False)[1]
|
||||
|
||||
def get_k_neighbors(dist, k):
|
||||
E_idx = []
|
||||
for i in range(len(dist)):
|
||||
E_idx.append(get_k_neighbors_idx(dist[i,:], k))
|
||||
return torch.stack(E_idx)
|
||||
|
||||
def get_edge_features(dist, omega, theta, phi, E_idx):
|
||||
dist_E = []
|
||||
omega_E = []
|
||||
theta_E = []
|
||||
theta_Er = []
|
||||
phi_E = []
|
||||
phi_Er = []
|
||||
|
||||
for i in range(len(E_idx)):
|
||||
dist_E.append(dist[i, E_idx[i]])
|
||||
omega_E.append(omega[i, E_idx[i]])
|
||||
theta_E.append(theta[i, E_idx[i]])
|
||||
theta_Er.append(theta[E_idx[i], i])
|
||||
phi_E.append(phi[i, E_idx[i]])
|
||||
phi_Er.append(phi[E_idx[i], i])
|
||||
|
||||
dist_E = torch.stack(dist_E)
|
||||
omega_E = torch.stack(omega_E)
|
||||
theta_E = torch.stack(theta_E)
|
||||
theta_Er = torch.stack(theta_Er)
|
||||
phi_E = torch.stack(phi_E)
|
||||
phi_Er = torch.stack(phi_Er)
|
||||
|
||||
E = [dist_E, omega_E, theta_E, theta_Er, phi_E, phi_Er]
|
||||
return torch.stack(E, dim=2)
|
||||
|
||||
def get_mask(E):
|
||||
return torch.tensor(np.isfinite(np.sum(np.array(E),(1,2))).astype(np.float32))
|
||||
|
||||
def replace_nan(E):
|
||||
isnan = np.isnan(E)
|
||||
E[isnan] = 0.
|
||||
return E
|
||||
|
||||
def get_S_enc(seq, tokenizer):
|
||||
return torch.tensor(tokenizer.tokenize(seq))
|
|
@ -0,0 +1,386 @@
|
|||
>seq1
|
||||
SISTRIGEYRSAQSKEDLIQKYLNQLPGSLCVFFKFLPSVRSFVATHASGIPGSDIQGVGVQLESNDMKELSSQMAIGLLPPRFTEMLVEAFHFSPPKALPLYAHNALEGVFVYSGQLPAEEVARMNEEFTLLSLCYSHF
|
||||
>seq2
|
||||
AFPMRIADYRSAQSKEDMIQRFLNGISGSRCLFFKFLPSVRSFVATHANGIDAAQIQGVGSQLTSDEMKDLGSLLAMGLIPEKFSSMLVEAFHLNPPKAIPVYANAHLEGLFVYSGGMDKKAIEQLNDEYTLFNLCYSHF
|
||||
>seq3
|
||||
AISEVINDYRIAESKEDIIRMLFQNLSNLPLLFFKFLPSMNSFVMSHASMPNHQVYEGLGSALNPEETKDLVKQILLNIVPASFNQVISNMFMFQRPSMIPLFDRENLEGVFVFDQESANSLIDQMQDYVSATSLYYSLY
|
||||
>seq4
|
||||
HVSMRITDYRSAQSKEDLIQKYLNHLPNALCIFFKFLPSVRSFVATHAQGIPASDIQGVGVQLETGDTKDLTTQMAMGLLPARFNDMLVEAFHFNPPKALPLYAHHALEGVFVYSGNITPAEASQLAEEFSLLSLCYSTF
|
||||
>seq5
|
||||
FISPRIGDYRSAQSKEDMVQKYLNNLPQMVCLFFKFLPTVRSFVATHATGIPASDIQGVGVQLDSADMKELNSQMAMGLLPSRFMDMLTEAFHFRPPKALPLYAHNNLEGVFVYSGEVNKQLSTAMTEEFSLMSLCYSNL
|
||||
>seq6
|
||||
KVSMRITDYRSAQSKEDLIQKYLNHLPNSLCIFFKFLPSVRSFVATHAQGIPASDIQGVGVQLESSDVKDLTTQMAMGLLPARFNEMLVEAFHLSPPKALPLYAHHALEGVFVYSGSISAAESAQLAEEFSLLSICYSAF
|
||||
>seq7
|
||||
SIGQKIQLYSAATTKDEVLIHFINQLSC-QAIYFKYLPTVQSFVATHSHGLDIEAIKGVGTRLEATETAHLLDLLKSGEIPPALAELMKEGLRIGqyfpKPVIVQNSTLTGLDGLFIFWGAEGF-HFQQIESDFLIFSLLYQQA
|
||||
>seq8
|
||||
AISSRIATYKVAESKEDMLHKFLQHIPKeTLCIYFKYLPSVRSFVATHGWGIPNSDIQGVGVQLEAEDMRTLSERVTMGQLPERFSKMLKEAFGFNPPKALPLYAYHSLEGVFVFSGSLDAKYVAEINEEFTLLSLCYSNF
|
||||
>seq9
|
||||
PFQTRIAEYRAAESKEELVQMFFRQTASQSWAFLKYVSSIQTYISVSSQNMPDEWVEGLSYKI-PSSQSDFNDKILLGEYPESFLQYIKAKWGVDTLKVMPLLLKNEIEGLLITPQDI----SAEVAEDFSLMSLVYNLI
|
||||
>seq10
|
||||
PFQMRIAEYKMSESKEELLQKFYQQSPKQSWVFLKYIKSIQTYISVSHQNMEPSWVEGLSFKI-PTDVQEFNSRVFVGDFPDSLISYIKTKWDVSNLKILPLTFKDEIEGLLISTQDI----SADVAEDFSLMSLVYQLM
|
||||
>seq11
|
||||
-VSMRITDYRTAQSKEDLIQKYLNHLPGnTVCVFFKFLPSVRSFVATHALGVPASDIQGVGVQLESSDMKDLSAQMAMGLLPPRFTEMLVEGFHFNPPKALPLYAHNVLEGIFVYSGSLPAEDTAVMNEEFTLMSLCYANF
|
||||
>seq12
|
||||
-FSYKIAEYKTAQSKEDLLQKFLNNLEHTLCVYFKFLPSVRSFVATHANGIPAASIQGVGCQLESDDMKELGSQLALGLLPERFSSMLVEAFHFNPPKALPLYANNILEGVFVYSGLLGGTAAEMMGEEFSLFSLCYSHF
|
||||
>seq13
|
||||
-VSMRIADYRSGQSKEDLIQRFLNNAPESLIVYFKFLPSVRSFVATHAKGAEGSSIQGVGCQIEMSEARDLGSQMAMGEIPPTFVSMLKEAFHFNPPKALPIYSHSGLEGVLIYSGDMDKLMAHKLQEEFSIFSLCYSF-
|
||||
>seq14
|
||||
-VSVRLADYRTVQSKEDLIQRYLNHLPTqVGSVFFKYLPSVRSFVATHGAGIPAGDIQGVGVQLESSDVKDLPSQLALGMLPDRFANMLVEAFQFNPPKVLPLYAHNNLEGVFVYSGTTSPADITFLSEEFALMSLCYVN-
|
||||
>seq15
|
||||
--SMRIADYRSGQSKEDLIQKFLNNAPESLIIYFKFLPSVRSFVATHGKGIEGSSIQGVGCQIEMQEARDLGGHLSMGQIPASFVSMLKEAFRFNPPKALPVYSHAGIEGVVVYSGDIDKLVLHKLQEEFAVFSLCYSY-
|
||||
>seq16
|
||||
-FQMRITDYKLAESKEELLQKFFSQTPTQSWVFLKYIKSIRTYIAVAHQNMEENWVEGLSFKI-PGDEEKFNQQIMIGNFPESLTDYLKGKWDIQALKVVPLILKDQIEGLLVTPQDI----SAEVAEDFSLMSLVYQV-
|
||||
>seq17
|
||||
-FQMRISEYRTAESKEDLLQTFFKQTPTQAWVFLKFIKSIQTYISVLHQNMPESWVEGLSFKI-PIQETQFNQKIMVSDFPSSFLNYIKSKWNVEHVKILPLIIKNELEGLLVSTQDIDG----NVAEDFSLMSLVYA--
|
||||
>seq18
|
||||
----LIGDYRLSNSKEDIVQKFISSLQKTSCVYFKFLPSVRSFVATHSNGIDARLLKGVGSQLGKEESKNLTSTLSLGEVPPSMKDLLTEGFHFQSPKILPLFIQNQLDGLIAYDGKIDRNELQDFHERFSLFSLVYSHY
|
||||
>seq19
|
||||
-VSNRVADYRSAQSKEEVLQKYFERLGKIPAVYLKFLPSVRSFVATHASGFPPSHIQGVGCQLENNDLESLNTQITVGLLPPLMNELLQKVFHFASPRVVPLFVQNQLEGAVVYNGNLSKAESLRVGEEFSLFALCYSY-
|
||||
>seq20
|
||||
-FQTRIAEYRSADSKEDLLQKFFSQTPQQSWAFLKYVKSFNSYILVSSQNMPEDWIQGVSFKI-PNSEPDFNKKVTVGEFPASFLNYLKRKWEVEIIKVLPLLLKDDVEGLLVTTQDI----SPEVAEDFSLASLMYNL-
|
||||
>seq21
|
||||
-FQMRISEYRAAETKEELIQLFFKQTPQQSWAFLKYAPSIQTYISILSQSMPDSWVEGLSYKV-PSAMKDFNQQIMLGEYPAVLTEYICAKWGVKTVKMMPLILKDEIEGLLVTPQDI----SAEVAEDFSLMSLVYNL-
|
||||
>seq22
|
||||
-ISEVLNDFRIAESKEDIIRMLFQNLTNMPLLFFKFLPSMNSFVMSHASMPNHQVHEGLGSALNPEETKDLIKQILLNIVPASFNQVVANMFAFQRPSLIPLFDRDSLEGVFVFDQESSHTLIDQMQDYVSATSLYYSL-
|
||||
>seq23
|
||||
-FQMRIAEYRVAESKEELIQKFFKQTPAQAWVFLKFVKPIQTYISVSHQNMPEAWVAGLSYKI-PINQPDFNEHVIIGSYSESFLKYIKTKWSVDNVKIFPLIFKNEIEGLFVSPQDI----SAEAAEDFSLMSLVYSL-
|
||||
>seq24
|
||||
-FQSRIAEYRSADSKEELLQKFFSQTPQQSWAFLKYVKSISSYILVTSQNMPETWIQGLSYKV-PATDAEFNKKIMLGEFPDNLLNYLKRKWEVDIVKILPLLLKDDVEGLLVTTQDI----SSEVAEDFSLASLMYNL-
|
||||
>seq25
|
||||
-FQMRINEYKSADSKEDLLQIFFRQTPLHSWVFMKYVPSISTYIAVANQNMPQSWIEGLSFKVSALE-TEFNNKVAVGDFPLSLTYYLKSKLEVETVKILPLVIKNDVEGILITTQDI----PAEVAEDFSLMSLNYAL-
|
||||
>seq26
|
||||
----------------------------SMCIFFKFLPSVRSFVATHGNGVEGSQIQGVGCQLEADDTKDLNSQLSVGLLPARFNAMLVEAFHFNPPKGLPLYGAQNLEGVFVYSGSLDKNAAAQLNEEFALFSLCYSH-
|
||||
>seq27
|
||||
----RITDYKSADSKESLVQKYLQRSESTPMVFFKFLTSVRSFVVTQSVHMDVERIQGLGAQLSPGELKDLNAQLSVGLLPPSLLKMVNEAVHLQEFRVWPLYVHHQLEGVVICQVGE---ETTDLHEEFSLFGVMYSHF
|
||||
>seq28
|
||||
-----IKDYRSAASKEELLRRFVQVAGKTACVFLKYLPTVRSLVVTNASVFDLDHLQGLGCQLQPNEAKDFGSQVALGIVPPSLHDLLRQAFQFQKSRLLPLFIQDRLEGVVAYSTQIAPSEKMRLDDEFALMSLAYTA-
|
||||
>seq29
|
||||
--QLRIAEYKSATSKEDLLNVFYNQTPTQSWIYLKFVPSIETFICVSYSQVPEDWVEGLSYKVATKDRDNFMSKLFSGALPPNLGNYLKNKFGTDRIKFLPMIIRDKIEGILISTQEI----SAEVAEDFSLMSLVYT--
|
||||
>seq30
|
||||
-ISAKIAFYKACVNKDEVVLAFFRRLSC-KAIFFKYLPTVNSFVALSAQGVDVEDIKGVGSRLEPSESKDLPKQLSEGVLPQALVQILKEGLHVSRFEWKPLLVQNWVEGVLVFWGDEKF-QFASIENEFLIFDLIYQKM
|
||||
>seq31
|
||||
--QIRISEYKSCQSKEELLDVFYKQAETQSWVYLKFIPTIETFISVSNHQVPEYWVEGLSYKV-PASNKGFMDQIFQGVLPESFERYLIQKFNVKQIKFIPLIIKNQVEGLMISTQDI----TADSAEDFSLMSLVYTN-
|
||||
>seq32
|
||||
--QLRIADYKSASSKEDLLNTFYSQTPAQSWVYLKFVPSIETFLCVSYAGVPEEWVDGLSYKV-NSKDKDFMSQLLMGSLPLSIATYFKSKFGTDHVKYLPMIIRDKIEGILISTQEI----SAEVAEDFSLMSLVYTL-
|
||||
>seq33
|
||||
--QSRISTYRLAESKEQLLDQFYQATPTQTWIYLKLVPTIQTLICVSSANCPEDWSEGLSYKI-PTREKNFSDLLISGQMPEGLLSYLKVKLAVDKIKFLPLIIKQAVEGVLVSTQDI----SAEVAEDFSLMSLVYSN-
|
||||
>seq34
|
||||
--QTRIALFKGAESKENLLDLFYQQTPEQSWVYLKFAQTIQTFICVSYANVPENWIEGLSFKV-PIKEKNFLEQISLGALPESLSNYLTQKFGVERVKFLPLMLRDSLDGILISPQDI----SAEVAEDFSLMSLIYTN-
|
||||
>seq35
|
||||
--QNRIAIYRLAESKEQLLDQFYQATPTQTWMYLKLVPTIQTLICLSSANSPASWTEGLSYKI-SNKETSFLEQILNGLLPENLHSYLKNKFDIDKVKFLPLIIKQNIEGLLISTQDIDA----NVAEDFSLMSLVYTN-
|
||||
>seq36
|
||||
--------------------------------------SLRSFVATHGNGILSSEIKGVGVQLESEDLKDLASNLAMGLLPARFNEMLVAAFQFNPPKALPLYAHNSLEGVFVYAGNMSATEIKNISEEFSLMSLCYSNF
|
||||
>seq37
|
||||
--QSRISTYRLAESKEQLLDQFYQATPTQAWFYLKLVPTIQTLICVSSANCPEEWSEGLSYKI-PAREKGFSEQLLSGIIPEGLLSYLKVKLGVDKIKFLPLIIKQSVEGLLISTQDI----SAEVAEDFSLMSLVYSN-
|
||||
>seq38
|
||||
-------------------------LEKIPAVYFKFLPSVRSFVATHASGFNANQIQGVGCQLENQDLEGINTQVSVGLLPPLMLEMFRKVFHLENPKVLPLFVQNAMEGIVVYSRELSKADSFRVGEEFSLFSLCYS--
|
||||
>seq39
|
||||
--QTRIADYKLAESKEELLNTFYRNTPDQTWIYLKYIESIQTFMGISSHLAPESWVEGLSFKI-PRGQSEFNQTIARGGLPKDFLNYLTEKFDTARIKTLPILLRDKVDGLLVTTQEIPI----EVSEDFSLFSVIYA--
|
||||
>seq40
|
||||
--AEKMRVYQGAQSKDDYLAVFLQHLPC-HAIYFKFLLTVNSFVATASQKLEIESIKGVGSRLTADEVKSLVEDLEAGRLPASLKELMNEGLKVPRYYSQPVPVHRGLDGLLVFWGDAEF-QYQDIENDFLIFHMLYQQ-
|
||||
>seq41
|
||||
-FQTRIAEYRSAESKEDLLQKFFAQTPQQSWAFLKYVKSINSYILVSSQNMPESWIQGVSYKI-QNAEADFNKKVIVGEFSQNFLNYLKRKWEV----------------------------------------------
|
||||
>seq42
|
||||
--TSHVLKYQQASNQEELLQTFMNSLGAIHSIYFKFLPTVNSFVATLSHGIDIESVKGAGSRLTDAESKDVHEFLASGQIPEALRALMQEGLKIVQFISQPVLLYRSLDGLFIFWSSENF-NFSLIENQFQIFQLVYQN-
|
||||
>seq43
|
||||
------------------------QAPTQSWAFIKYVKTIQTYVSMSSQNMPGDWVEGLSFKI-PTDQSDFNDKLIVGSYSDSFLDYIKNKWGVKTVKILPLLHKNEVEGLLVTPQDV----SAEIAEDFSLMSIVYSL-
|
||||
>seq44
|
||||
--KELVQKYKATDSKEDLLGQYLGKLKF-QALFMKYLPTVQSFVATYSQGIDLDSIRGVGARLNTEESARLNELTQNEYLPPSLREVIEKGLGVRQFAVKSVTGATGLDGVFVFWSDHNF-DFSQHEDEFLIFQLLYQN-
|
||||
>seq45
|
||||
----FISDLRAAQSKEEMISTMLRESKETPLVYLRYLPSMASFLVTDSSYANVENFKGLGCRLTAEENKDLAKQLELAIVPPSLAELLMKAFRMNSPRIRSLMSGSVLEGILVGDASS-AESTHYLNERFAIMSLVYSHF
|
||||
>seq46
|
||||
--SEEFKKYRNATSKEDVLQIFLKEINQtflaknnkLSSLYFKFLPSVQSFVATQSIGVDIDSVKGVGGKFTEQDSKDPLELIKNGHVPSMIQELMKEGFSTEDFIFKPVFFDQMLDGFFIFWSNTQKVYSEEFENYFTLFLLFYE--
|
||||
>seq47
|
||||
-VREKINAYQKALAKDDFVQTFLEQLPC-NAIYFKWLPSVVSFVATCSKGLDIESLKGVGSRMTLEESRSVNEFLESGKLPEALNELMQAGLNIKNYFSQLIPVYHGFDGLIVFWGDENF-HFEQIENDFLVFRMLYQQ-
|
||||
>seq48
|
||||
----FISDLRAAHSKEELIATLLREARGTPLVYLRYLPTMGSFLVTDTSYEPPEEFKNLGCRLAPEENKDLVRQLELALVPPSLGELLSQAFRIQNPRVRPLLNGAELEGILVGDANS-PEITHELNERFAIMSLVYSHF
|
||||
>seq49
|
||||
-VSHRVADYRSSQSKEEVLQKYFERLDKVQAVYFKYLSSVRSFVATHAAGFTSQQIQGVGCQLEGADLESLNTQIAVGLLPPIVMEMLQKVFNIQT--------------------------------------------
|
||||
>seq50
|
||||
----------------DLVQKFVSSLQKTSCVYFKYLPSVRSFVATHSSGIDARLLKGVGSQLGAEESKNLNTSLGMGEIPKSLKELLGEGFHFQQPRVLPLFVQSQLDG------------------------------
|
||||
>seq51
|
||||
--TEKANAYEDLTTKDEYLSAFLLRIPC-RAIYFKFLPSVNSFVAVSGHGIDIESIKGIGARMSPDESKDTLQFLQKGALPESLREIIVEGLKISQYLVKPVPLYRSLDGLFVFWGDEGF-DYREIENEFVLFHLFLQ--
|
||||
>seq52
|
||||
-FQTRIAEYRSADSKEDLLQKFFKQTPSQSWAFLKFVKSINSYILVSSQQMPEEWVQGLSYKI-PNSEKDFNDRMMIGEFSDGFLTYIK---------------------------------------------------
|
||||
>seq53
|
||||
-IKDFIMDLQAAESKEELLQVFLRESANLSLVYLKYLPSMSSFVVTHTAHLPLEKTEGLGCRLSPEENKDLLKQFSLGLVAPSLMELLEKAFQIKSPRVRPLFDRQALD-------------------------------
|
||||
>seq54
|
||||
-FQMRIADYRVAESKEELIQIFFKQTPTQSWAFLKFVKPIQTYISVLQQNMPEAWVEGLSYKI-PVNQTDFNDNVVIGSYPEAFLKYIKNKWD-----------------------------------------------
|
||||
>seq55
|
||||
--QLRIAEYKSCESKEELLDIFYKQTDQQSWVYLKYIPTIETFISVSQHEVPDNWVEGLSFKV-PAQDKDFLSQIFQGNLPPALENYLLGKFKVPQIKFIPL--------------------------------------
|
||||
>seq56
|
||||
--QLRISDYKLAESKEELLDLFYKNTPEQSWVYLKFIESIQTFMGISSHLIPDSWMEGLSFKV-SKNQKDFMNLVRQGELPKNFLQYLSQKFDTAHLKFLPVLLRDKVDGV-----------------------------
|
||||
>seq57
|
||||
----LIQKFSRASSKEEFVDVYFKYVSErksgapFSAVYFKYLPSVVSFIATQGHQINLERTKGIGLKLMGDEAGHLAEQLQNQNLPMGFQNLLQEAFQIQDWTVYPLFLKDQVEGVVIFWGLE---LTPGDWEEFLLFQLCYQN-
|
||||
>seq58
|
||||
--EEKIQLYRYAVTKDDYLEAFLRQIPG-RAIYFKFLPTVGSFVSTFAQGLNLDDLKDVGVRLTFEESKDVDTFFKEGGTPVALKELLAEGLHVDGYMTKPVFSINTLEGIFVFWNFNAA-Q------------------
|
||||
>seq59
|
||||
-----------CQSKDDYVLAFLKQLSC-NAIFFKFLPTVSSFVATAAQGLDIETMKGVGSRMSLDESKDVKAFLSSGQVPQALNELMTDGMKVPQFYSHLVSVPQGPEGLLVFWDGQ-RSGPPAIEDDFLIFQLLYQQ-
|
||||
>seq60
|
||||
--------------------------------FLKYVKSINSYISVSSQNMPESWVEGLSYKV-PNNETDFNQNMLVGIYSEHFLNYLKRKWSVDIVKVLPLTLKDQIEGLLVTPQDIKG----EVAEDFSLVSLVYS--
|
||||
>seq61
|
||||
-FNDEIKKYRKAANKEEVIADFLKDLNQkflsrnqrLFAIFFKYLPSVYSFVSLQSLGLDVESLKGVGFRLTPEEARQPAELFAEGKMPSQLTQLLQEGLKVPSPLVCPVLVQGKPEGYFCFWTNSGGLSLESIANELSLFQVLFE--
|
||||
>seq62
|
||||
--------------------------------FLKFVKSINSYILVSSQNMPEGWIQGVSFKI-PGSEPDFNNKVIIGEFSPSFLNYLKRKWEVDIVKVLPLVLKDEVEGLLVTTQDI----SPEVAEDFSLASLMYNL-
|
||||
>seq63
|
||||
-VAERIRNYLSVESKEDLLSRWMLGLGEKPCAYLQYLPSVRSLVVTHGTL-PES--QGVGCQLTPAEAQDFATQVALGVVPPTLDDLLKKAFKFSSVRLLPLFTQGKLEGVA----------------------------
|
||||
>seq64
|
||||
------------------------------CVYFKFLPTVMSFVATNGYHVDLERTKGIGAKMTPDETKTLLATILERKMPPSLHTLMAEAFRVSQSFLYPLLLKNQIEGVFVFWGVAESVFPRSFGAEFSLFQLCYQN-
|
||||
>seq65
|
||||
--GREISAYANAKTREEMTDVFLQQMQQkclrrnlnLRALILRFLPTVQSFVATQSLGLDIDKLRGIGARLEKDEAEKLTELARAGQHPKQLLQLLQQGLALDNFQVQPMILRDHVEAYFVFWVESGVLQKTDFENEFTTYSLLYY--
|
||||
>seq66
|
||||
-FQEKIRLFDAVESKDETIHTFFRALPC-RAIFFKFLPTVNSFVATMAKGLDIESVKGVGSRLKSDEIEKLQKILQSGALPSALEDLMTN--------------------------------------------------
|
||||
>seq67
|
||||
-------------SKEEMIWSFLSHLPC-RGIFFKFLPTVQSFVATQAHGLDIDXXXXXXSRLTQEEARDLDQLLQKKSLPKVLDELMKDGLRVPSYQSHVLRVQRHLEGLFVFWGLE-AEGAAQVENSLLIFQLAYENF
|
||||
>seq68
|
||||
-IQDRLNEYRSAESLEILVQRFLGQFQGRLVIYFKYIEAVRSLLALNAQGVEISKLAGLGLQVEVKDLQIFYDQLLLGLAPSKVISEVSELFGSTKLKSFPIYVFEKLQGFFITVDED----LEIFESDLSLVSLAYSH-
|
||||
>seq69
|
||||
-----------MQSKEELIRWVFSRIEArysqeskINGIFFKYLPTVSSLVATLTMGLGSDKAKGVGAKLTPEERKTLGSDLSGNLLPESLKNVLTSGFHVSEFSSFPVFVQNQVEGVFIFWPMQ-ISESDKI--ELNIFQLYY---
|
||||
>seq70
|
||||
---------------EGVVQTFLELSSElvekKPVLFLKYLPAHSALVAAQVAQMDPEKIKNVGFSVAQIDPKELNELFQHPEKIQQLTELMKVVFSQNEFSSLPFIYQNQVQGVFVLFGSFKSESDQKvFESYMQLMNVRYDN-
|
||||
>seq71
|
||||
--KNEVMVYAKAKSKEDILDLFLHHLEQkclrqnlkLNALVLKFLPTVQSFVATQSLGLDLEKVKGVGARLEKDEAQDLLGFLESGHFPKQLIHLLNQGLGVSNFVGKTLMLHDAIEALVVFWSSNGTLKAAH---------------
|
||||
>seq72
|
||||
----------------------------------KYLPTVKSLVVTHASATSAERLNGVGCQLAAEEARDFASQVALGMVPPSVSKLLSEAFAMNSASLWPLFLHRQLEGVVAFSREVDPRVMAALRDEFSLFSTLYAN-
|
||||
>seq73
|
||||
-----------SHTVEEATQDWLNKANLlfsgVRAAFFKYVPGHPYLMLTQCSGMELESVRGVGVNLTGLSPLEQKSFYSHTRFLMALKDLLKGAFNSDGFEYREIISDRGVLGIVVVFKTLEHEREKIfFNDSVEILNIVV---
|
||||
>seq74
|
||||
-ISNLLTLYENTRTREDLLQVFFHSLESyclegAKVLYFKYLEPVQLLVATHGCGVPVDDIKGAGIRLQPQEVLDAKNLLLSPRGFGSLNKLLTDIFHVDQCYVKPLIVREDIDGLFVFFGESLESfNSIRFSNRFSLFRVCFER-
|
||||
>seq75
|
||||
--GEELAKYGKATSKEEVLSLFFRELEAkfsrleipIKALFFKYLPTVQSFVAMQGLGLDLDSIRGVGGRLVNEESHDPEGFFAKGALPYELKMLLNELVGRDepqmQMKSLYISIRDQVDGLFVLWGSSPQIKWQVIQNEFAFFNLLYE--
|
||||
>seq76
|
||||
-----LSSFHNATNSDELLQIMVNFISQSKVIFLKYFEGIQSFVGFQSNFGSAEEIQSIGCQLKPPESNELAKQMSLGIVPVTLKELSQKVFAFSQPQFWPLVIEiGKVEGVMISEGGGDEALKADILNRWSLFSLVYKNF
|
||||
>seq77
|
||||
---DEVKIFEGAHTKSDLMALFLRRLSDlmreqqvgLKAIYFKYMPSVQSFVVMQSLGIDENSIQGLGGKLSPEEVKSFPDLFQQGTIPEQIQLLLEQGLKIQQFQSQVLWLNEILDGFFIFWSDQMRLETQFYSAQFLIFKMI----
|
||||
>seq78
|
||||
---------ELCGSVEEAIEACMFQISNwaesAALLFFKYIPGHPYLVLAQSKGIDSDKYRGVGVSMTGLAFSELGHFSEHGRFLSQLDSVMKGAFKAEKYTFSEVKSEQGILGIVVLLKSLESRAQKRfFEDCISVLNLTSQK-
|
||||
>seq79
|
||||
-FKEFISDLRAAQSKEELIATMLRETKETPLVYLRYLPTMASFLVTDTSYTPAENFKGLGCRLTPEENKDLAKQLELAIVPPSLAELL----------------------------------------------------
|
||||
>seq80
|
||||
-ISNLLTLYENTRNREDLIQVFFHSMEEycqegAKVLYFKYLEPVQLLVATHGSGIPVDQIKGAGIRLLPEEALEAKNLLISPRGFGSLNKLLLDIFHIEQCYVKTLFVREDIDGLFVFFGDSLEvFNTIKFSNRFSLFRVCFER-
|
||||
>seq81
|
||||
-----------------------------QGLYLKYLPTVHNLVATRALGLPIEKLKGVGAKLTPEEVQQLDLTVARHEVPPSVKALMVEGFHVPEFAPRGLLVHRGVDGLFVFWSKTSF-DIDLLDNEFMTFSQAYQM-
|
||||
>seq82
|
||||
---------------DHVVQMLLDAISQviqnKPVLFFKYLPQHSSLITSHASKIPVEKIKNLGINLSQVEATKIPEMLLHPLTIPGLPDLMKEVFQVPSYHAIPFVHQNQ---------------------------------
|
||||
>seq83
|
||||
---------------------------------FKYLPAYSSLVTSHASKIPIEQIKNLGVNLNQFQPLEIPEMLLRAQEMPGLIDLMKEVFKVSTFSALPFVYKNQTFGIAVIFDTLQNQSTKRLVESFL---------
|
||||
>seq84
|
||||
-----LKIYDGLISKSELTATFLRRMGEilraqklsLKSIYFKYMPSVQSFVVMQSLGLDEATVQGLGGKLSPEEIKSLKDLFQIKKIPEQIQLLMEQGLKVHVYQTQVLWTQDILDGFFVFWSDRVNLEPNFCQAQFLI--------
|
||||
>seq85
|
||||
--------YKTCETKEQVINLFFEQMEGLKVLFFKHLPTVHSLLVTHSSGFSHEDVQGIGCQLQDHEHKDLTSQLTLGVCPGPLALLL----------------------------------------------------
|
||||
>seq86
|
||||
-----IKEFRAVATQEELIQKLVVFLPPkSMMLYFKFLPSIQAFVATHCCGLPQEQIQGVGCRLSLEEQSTFSADVILGRLPLSLASLLEK--------------------------------------------------
|
||||
>seq87
|
||||
----------------EINQKFLAKNKKLTSVYFKYLPSVSSFVALQSLGIDIETLKGIGCRLTKEETEDSKTFFAQGGVPAELKVLLNEGLNAPQAIVKPIFVQEQLDGFF----------------------------
|
||||
>seq88
|
||||
----------------GVLQVFIEAVSDitdgKPVIFFKFLPAYSSLVANHAAKIPVEQIKNLGVNLSSFDTKSIPELLMQPVKLQPLLEMMKEVFQVQQFFALPFVYQNQPIGVIATFTPLSSDPVRRLLESFL---------
|
||||
>seq89
|
||||
--------------------------------------------------MPDEWVQGLSYKI-PNSDKDFNDRMMLGEFSDNFLNYIKRKWSVDIVKVLPLIIKNEIEGLLVTPQDI----TAEVAEDFSLVSLVYN--
|
||||
>seq90
|
||||
----------------------------------------------HASGFTSQQIQGGGCQLQGADLENLNTQLSVGLLPPIMMEMLTQIFHIKTARVLPLFVQNQLEGVVVYSAEAGKADTLRIGEEFSLFALCYSY-
|
||||
>seq91
|
||||
-VRQEIEKYRDSRSKEEVLNVFLREISQkflsrnkkISTIYFKYLPSVHSFVAVQALGIDIETLRGVGGKLTDEEAKDPKSFFLSGGIP-----------------------------------------------------------
|
||||
>seq92
|
||||
-LEERIKEYRSAAAKEDLLSKFMQIIAPIPCIYFKYLPTVKTLLATNASIFKPEQLQNVSCELRPEEAKDFANQI-----------------------------------------------------------------
|
||||
>seq93
|
||||
----------KSKNLDEVIQSFLDQTQNligKPLIFLTHMPSYLSFIASHAAGIEKAKIRNLGLNLKSIDSKQYLEKIADPQSLEQLKQLMTDIFKVPDYLALPMEEEGAITAYILVLGSVEDVSLRRLLDSFvNIFK------
|
||||
>seq94
|
||||
------------------------------------------------QNMPEVWVEGLSYKI-PANQTDFNENVLIGNYPESFLSYIKNKWGVDSVKVLPLIMKDEIEGLFISPQDITA----EMAED-----------
|
||||
>seq95
|
||||
----------GTKSLDETVQAFMDELSResqrFPVIYFKYLPAHASLAISQASHLAVDKFRGLGLDLKGLEPKALSSFFQSPEKSEKLEDLLKQAFRTESYTAFTHFNEGEALGLFVIFNRAKGEAHAILDSATH---------
|
||||
>seq96
|
||||
------------TEVDEVLKTFLSHTSKitgdAKVLYFKHLPAYTSLLLYMAEAIDVNDFKGVGVSLKDLYPVKYNEIILSPQKIESLKNMLSEVFKMNSALVIPLISESNVAGVIA-CEAIEDKSIRRVFDNFlQVLSLNYQN-
|
||||
>seq97
|
||||
---------------KEAIDLYLQEVSRflgeKQIVFFKYIPSHQSLVASQSVVADVNKVRGLGFELSKEEENFSLDQLHKPMTLEGLRRLMSSSLGIDEYFAQALVVQGVPEGVFVFVGSENIQAEPYIKSCVKSLG------
|
||||
>seq98
|
||||
-------------SKEEVLNIFFRELNQkflsrnlkISAIYFKYLPTVQSFVAMQTLGLDLESVRGVGGKLNAEESKDPDQFFAKGAIPQSIQILL----------------------------------------------------
|
||||
>seq99
|
||||
--------------VTQVVDIFLKQSAEiledASILFLKHLPSYRSLSVSQAHNVDPQKVRGAGISFKELSQQDYIQRISSIADAPELAEMLVQVFQKSEYVVFPLSIENQLAGAFVFLEEIHDMQSLRLLESF----------
|
||||
>seq100
|
||||
--QLEFKKYFQCKTKDDVLALFLLELQQkflarnkkLNAVYFKYLPSVTSFVALQALGIEIDSLKGVGCRLTDIESADPQAFFSQGGVPTELKELMSDGL------------------------------------------------
|
||||
>seq101
|
||||
---------------EAVIQTLVNSTSDliqkKPALYFKYFSAHATLVASHAAQIPGDHIKNLGLKLSTAEPKQLHAWLTQepGKIP-QLTELMKEVFTCKDYVAVPVLYKNDPQGTLVL--------------------------
|
||||
>seq102
|
||||
---------KKAKSRDELFRlyfKWLSQFSNLNLLYFKHIPSAHALMMQYCIGLDQQKLDGLGCELNPKDQL---------KDPQSLRELIRQALGQASVQVIPLVLQNEIQGYFVYWGQQQNISADIFELYFE--NLVY---
|
||||
>seq103
|
||||
---------RISETKdlDETIQIFMEELSResrqFPVLYFKYLPTHASLAISQASHLAVEKFRGIGLDLKRFDPKQAAEFFKAPSRDSELNDLIKQAFRANEFAAFTHFNDSETLGLFVVFNRPAT-AAQNI--------------
|
||||
>seq104
|
||||
------------------------QLAGlRPVIFLKYLPAHQSLTITASAQIPFNEIRTVGVKLNHYDEKNISFVLGEPEKFKELKELMTELFKCPNFTAVPLRVEGKVVGIFAIFQAMKD--FDElVNKIVYLGALSY---
|
||||
>seq105
|
||||
----------------------------KPVLFLRFLPAYSSLIAGAASKLPLDQIKGLGINLSSFDPKQISELLMHPEKIAPLSEFMREVFKVADFSAFPFVHENHPIGIAL---------------------------
|
||||
>seq106
|
||||
----------------------------------------------HSQNMNSIEVEGFTFQIELRQVKDYMSQFSLGVVPPQMIQSIEEKLKIKLNKALPLFVEDQLEGVFVTPTAI----PEDFAEEFSLFSLTYQL-
|
||||
>seq107
|
||||
-------------SSDEIHSILLKEISNefrsSGVLYFRYLPTHKSMVLTSWQNVDESNTKSIGLSFKKLTQEEIQNVLLSPHLCQDLTRLMAIVFKVKEFETAPMLgIDGAINGIVILTQKFESDRQSElLNSMVSVGSLVLQ--
|
||||
>seq108
|
||||
------------KDVSDTIQTFLEHTSQltegTKVLFLRYLPAYYSLLLTHAAQYPMEEGKKIGINLKDIDPKSVMDHLRTPSEMPLLQNLLKQVFSVEQYLAVPVETDDEFISIVIVCRDMSDPALRRVFDSFmQIFKVSY---
|
||||
>seq109
|
||||
---------------------------------------------------------GVGSQLTESETQSVAETLKKGTTPQSLRNLMAEAFAVTDFILKPLFVHKNLDGLFLFWSQTGV-AADEFTNEFLIFSLLYQN-
|
||||
>seq110
|
||||
------TQLSKAKNIDDVINIYLSHVASlvdqKPIIFLTHVSAYLSLVVTHSIQLNKDNLKNVGVSLKDLDSKTYVEQLMNPMSMAGIQGLMKDFFKLDKYLAVPIEDETNIGGIVLVFDAIEDVSTRRLFDSFT---------
|
||||
>seq111
|
||||
----------KAKGVGEVVQIFTEQTSEmlggVPVAYLTYFPSMIAYGVTNCALIDKEKIKSLGINLKDKPKEEYDKILQDPYSSSFLNQFVSQYFKTTAYLAIPLVDQGGLKGLFLVLKDITDAQQRRLLDSF----------
|
||||
>seq112
|
||||
------------------------------------------FVAMAAHGLDIESIKGVGSRMTKEEAKDVIAFLASGKVPQSLDELMSQGLKVPQYFSQAVPIHHGLDGLMIFWGDESF-HFKKIENDFYVFLMLFQQ-
|
||||
>seq113
|
||||
----------------------------------------------HSSGYLQESIQGVGCQLPENENQDLVSQLSLGVCPGSLAVLMKRALGLAQVQVFPVFSQNDLEGVIVADVVPDSEFHRHLSEQFALFGLAYSHF
|
||||
>seq114
|
||||
------TQMSKAKEVDQVIQVYLDQVSElvgkKPVIFLSHISAYLSLAVTHSAQVNKETLKNIGISLKDLDTKVYIETLMNPMSFSDLQSLMTDFFKTSDYFAIPIEEDNTVAGIVVVFDALKDVPTRRLFDSFT---------
|
||||
>seq115
|
||||
---------KLAQTKEldEAVRLFINAFSRlssdTPALYFKYLPNHLSLVFSQASLLPENKFRGIGIDLKKVSDLGPDRFFENPASAKILREFVGEVFKKDRFTVFTHRVDQEVVGLFLALEQFDLKARSDLSLLIEAFDLAYK--
|
||||
>seq116
|
||||
--QAQIEKIQAATILSRTVETYLQAAHSrtgKPVCFFKYLPARRSLFLTNASGLNLSDYRGLGIDFVKENPEFQASHLQTPHNLQALQELIRTAFKVEGFYALPVDDGNRVRAIVVFLQ------------------------
|
||||
>seq117
|
||||
-VQQEFKKYLECQNKDEVLALFLLELNQkflarnkkVCAVYFKYLPSVSSFVALQTLGIEIDSLKGVGCRLTAKENADPISFFAGGGIPQ----------------------------------------------------------
|
||||
>seq118
|
||||
-------------------------IKKQPVIYLKYFSAHATLVAADVANLPPEYIKNLGINLSDMDPKELQRVLmEEPAKIQQLKELMKEVFKIDKYVAFPFT-------------------------------------
|
||||
>seq119
|
||||
--------------IEVFLTHIEPQLSDeSGALYFKFNSPRRTLVASYGLGIDPTLIEGIGLNLEKVDPEFKRSDIKQIHERDSFNGLLHEVFGVSEYSIRYLTLNEEVIGVFVHWGLLEKTPRHDfVDRSFTLLSK-----
|
||||
>seq120
|
||||
---------------DTVIQTYLDHVSQlvnKPAIYLSHISAYLSLAVTHSSQINKDSLKNIGVSLKDLDSKVYIEKLMYPMNLEGLQALMKDFFQTAEYFALPVEEEAGIGGVVVVFDPLKDVVTRRLFDSFT---------
|
||||
>seq121
|
||||
--------------AQDVVQVFVDFASHalgdLPVVFFKHLPAYFTLSITHSAVVPMAQLRGIGLNLKSEISNDYGSLLKNPAELPSFRQLLFEVFGARDFEAYPIETEEGPSGVVVALQKLEDAATKR---------------
|
||||
>seq122
|
||||
---------------DEVVHVFIHTFSRltsdTPVLYFKFLPSHLSLVFAQASLLPEEKFRGVGIDLRKSHPGRPEEFFENPEEAKALREFVAQVFKKNRFTALTHRVDREAVGLWVALEKFDMK-------------------
|
||||
>seq123
|
||||
--------------LSCAAETYLQAVHSrieHPVCFFKYLPARQSLFLAHASGLDLNAYRGLGIDFSKGKAEFQASQLRDPSSIQEFSELIQTAFKVEKFCALPIDDGDGVKAIVVFLQSN----------------------
|
||||
>seq124
|
||||
---------------------------------------IQSFVVLQSIGLDAEAVQGLGGKLSPEELLNIKVLFQSQQIPGQIDLLLRSGLKVEKYLGQALFIDGQLDGFFVFWSDSSEIQESFYAAEFLVFKMVYQN-
|
||||
>seq125
|
||||
----------------------TRSLNDTPVLYFKYFPGHMTLLFSRATLLSNDTFRGLGIDLKKEGSRTLEEHFEHPATLPGMRKMVREVFKCELFSALTHMDGNEVRGVFIILDEVEV--------------------
|
||||
>seq126
|
||||
-----------------FLANLHQNLSTSPVLYFRFVNSPLSLVITHASGIQVDKVRGVGFHL--QDTQSIREKLLQPEALTGFYDFLKQAFRLQE--------------------------------------------
|
||||
>seq127
|
||||
-----------AATKdlDETLDIFLEALAAetkAPTLYLKYLPTHASLLIAQVAQLPIEKYRGLGLDFRKEGIQNASELLRDPSEVKPLAELIRSLFQKESFVAIPHLNEGEALGVFVVLGDLDVSNSA----------------
|
||||
>seq128
|
||||
-------------TLDDAVQVFMNSASNalggCSAVFFKYIANRRVLIAGRGEKLQDHDLRGLGMDL-NRSVDGFRaVQLREPMHLMAFVQMVKDVFGIKDFFAWPVRALGEIQGLICFLKN-----------------------
|
||||
>seq129
|
||||
----------AAHSIDQAVEVYLKAMHQslaQPVCLFKYLPARRSLFLTQVMGLPPEPYRGLGVDFSKQEENFSSQVLRQPEGLSSLRDLVNSAFKTSTFAALPVDDGEGIKSVVVVLNRS----------------------
|
||||
>seq130
|
||||
-------------------EAYLQAVHSrtgKPVCFFKYLPARQSLFLVHASGLDLNAYRGLGIDFSKEGSEFQGSHLLDPQSIQKFRELIRKVFKVERFCSFPVDDEDGVKAVVAFL-------------------------
|
||||
>seq131
|
||||
----------------------------------------------------------------PEEMEDLTSQLQAGHLPSALQELMTQGLKIAKFNFKPVLVQRALDGLMIFWGEASA-HQALIENNFVIFQMLYQQ-
|
||||
>seq132
|
||||
---------------DHIIQAMVDEISHtsqnAPALFLRFLPAYQSLTVTASAQIPLESIKTVGIKLDRYDPRNIASVLKDPLKFLELKELMSQVFKCPEFYALPFVIGGNPIGLMISFGTSE---------------------
|
||||
>seq133
|
||||
------KELEMCQNKEDVVNLFLREAVRylgtKEGLYFRYLDLHRTLALAQTFGIERNDIDGVGIDLAALEPGFLPSQLSKPHLLFSLQEFVKKGLLRSGAVILPLNFRHQTIGVFIF--------------------------
|
||||
>seq134
|
||||
----------------------------APAVFLKYLPNRRALVTSAAHRLPAEAWKGLGLNL-SEEPDFRIGDLRHPEKLAGLGEMVQSLARAKEYWVRPMIIRDQVHGLFLVMGPSGDLPVQQMESTISVM-------
|
||||
>seq135
|
||||
-------------------EEISRASQKAPALFLRFMPAYQSLAVTASAQIPLESIRTVGIKLDRYDPRNVSSVLRDPLKFPELQELMGQVFKCSEFYALPFIVGGNPIGLVISFGKSEN-LLKIFSSLVIISELAYN--
|
||||
>seq136
|
||||
--------------------------GQAPAVFLRYLPNRRCLVTTAAHRIPAEAWKGLGLNLSEEPDFRI-GDLRHPEKLPGLREMAQSLGQTNELWVRPLVIREEVHGLFVVLA------------------------
|
||||
>seq137
|
||||
---------------SDAVALFIDEANVlieNPIVFFRHAPTYSSIVYSQSAGLERGDYKNVGLNFRELNDRVYPNNLKNPDNIEPLRELLAKIFQVTKYETHPLIIDGDVAGFFVCFTEINEPPMLRlFHAAFEILQMK----
|
||||
>seq138
|
||||
----QVEQIQASMTLPNAVEAYLQAVHVrtgKPVCFFKYLPNRRSLFLAYASGLDLSTYRGLGIDFNKEKFEFQKGLLRDPQNIQVFKDMIQTAFKVERFCTFPVEDANGVISIIVFLKA-----------------------
|
||||
>seq139
|
||||
-------------------------VNSAQAVYFKYNAVRKAIVASYGLGVDPALIEGIGLNLERVDPSFQRSEIRVLHERESFQNLLRDVFKVKTFHCHYLSVGGEVIGVFVHWGLSENSE------------------
|
||||
>seq140
|
||||
----------HAKDVHEIIQRFMNSANTafsqAPVIFFRHLQTQDQFVLSQLVGINEPTLKGVQLGLSSEQI---------EQDPDALKDSIMALFEKDGFEFRFLKEDQQIIGLFVILKSFNDPYERKcLSHLFDIFDLSYQQ-
|
||||
>seq141
|
||||
---------LAASSIDRAVEVFLKAMHQtlsQPICLFKYLPARRSLFLTQVVGLPAEPYRGLGVDFSKQDENFSSQSLRQPETLPRLRELMQSAFKAST--------------------------------------------
|
||||
>seq142
|
||||
---------KMAQAKDtvEVIDFFLNQVSEltgKPCIYLKHFSSFSSLVVSNSSILEIEKLKKVGLSFKEEDPKTYHAQVLDPSKMTKLNALMAQAFKVEK--------------------------------------------
|
||||
>seq143
|
||||
-------------------------LASCGMVYFKYIANRRVLMATQAHKLDIEW-TGLGVNFNETGDTFRTQHLREPGQIVEVQNMMREAFHAGEFFAFPVETLGEIQGVVAFLRPEPDAATMKmIQD------------
|
||||
>seq144
|
||||
-FSEEFMRYQTAKTLDSLLAIFFERVQKisrknYSGLFLKAIPSIDSLAVFGGTAFDWQKWKGYGVKVQ-----SFQQNPRRPLVISEVVEFMREVVGVEQFVSFPFAFENHLEGVFLFWCHQDKPQSRDLE-------------
|
||||
>seq145
|
||||
--------------------------------FFKYIANRRVLMAGQAHKLDDFDLNGLGINFNEANSDFRSSQLRDPHGLNELQAIMAEVFNASEFVALPVEALGEIQGIVILLRNDPDPAGQQkLQEWVFLLS------
|
||||
>seq146
|
||||
---------------DGVLQTLLGSLSQlidgKPALYFKFLPAYNSLVVGLSEKIPIDGLRTVGISLEKYNLKEVPTLLTTPEKIPQLNDLMGEVFK-----------------------------------------------
|
||||
>seq147
|
||||
----------KAQSLDELGINLVWSLKNivssrRKGIYFKYLPTYCSLVALGGFNFEDKNISGVGLNFASSKDFNAAQHLHKLMYVPAFLKVVERLFAHTNVTVTTLSCENEVRGVLVCEKPPSR--------------------
|
||||
>seq148
|
||||
---------------DDAIQLFLREAIRylqtKEALYFRYREVHHTLVLTKSVGIDLADVDGVGIDLAEHEPGFVPALLQKPHMLFSLQEFVKKGLLRQHAVILPHIFRGQVLGVFVLPCDKGLARVHD---------------
|
||||
>seq149
|
||||
----------RSTTRDDSIDLFLREATRylqiKEGLFFRYREIYQTLVLTHANGLDPKKFGDVGIDLAAHEPGFVPALLKKPHLLYSVQEFVKKGLSRQSAVVLPFFYRDQIYGVFVLP-------------------------
|
||||
>seq150
|
||||
-------KISVITNINDVVDCFVQSLFElinRPAIYLKYVPSHTSLIVTHVAGLDINKFKNAGILF-KEEPQSYLEKIKHPQKFQQLQEFMLHIFQVQSYFPIP---------------------------------------
|
||||
>seq151
|
||||
---------------DETIQIFLETLSRewndVPVLYFKYLPSHASLPLALGAGQKIEKFRGFGVDLRKESPDQIVEFFRAPESSEILKRFMKEVFASDNFTPFTHTTEGEALGLFVALTKTP---------------------
|
||||
>seq152
|
||||
-----------AEAVQLVVNELHRHMPFSQVVFLKHIRGRSTLVAESSSGIEMQALRSVGVDLKQTEPSFKEALLLRPEKLVGVTDLVRSGFDNRHFAAFPVVVQKEVWGI-----------------------------
|
||||
>seq153
|
||||
----------------------------RKGIYFKYLPTYCSLVALGGFNFDEHSVSGVGLNFSSSKDFNAAQHLHKLIYVPAFLKIVERLFEHTNVNVKTLVCDGEVRGVLVYDKSPSM------S-------------
|
||||
>seq154
|
||||
----------------------------------------------SGHHVSASSLKGVGFQL-EGTIENLIAQVQNGEIPKQLQTLMREGFQAGQYLCLPVWVSGDLDALLVVWSPSEVLRAEIFANHEALFSILYE--
|
||||
>seq155
|
||||
-------------SLTDIVTHLCSEIHKensCDVVYFKYIDSQGTLVAAHSEGLAFETIRGIGIDFISSGKKFFRDQLHLPASLMEMKELVQQVFNEKEF-------------------------------------------
|
||||
>seq156
|
||||
----------------------------------------------------------VGAKLTVDEYRALENLLRARTLPESLRKVIQEAFRIEKFTTFPLFLKNQVEGALVFWGLDLN---EGDWGEFLIFQLCYQN-
|
||||
>seq157
|
||||
----------NCDSESEAVKHCLGEIARslgrGQIVFFRFIRGRATLVAEAANGISAEAISNIGVELKKTEPKFNEKLLQRPERLLGLLDLVRNGFMQRQFAAFTVEIDRVPEGVILIL-------------------------
|
||||
>seq158
|
||||
--------------------------------------------------LDMEELKGVGCRLVESEANDPVSFFAQGGIPAELVGLAREGLNANEPIFRPIFVLGELDGFMIFWSRQNEIYPEELDNDLSLFQLMYER-
|
||||
>seq159
|
||||
---------------QNAIELYMKEVSRylkgAAVIYFKYIPGYESLVVTQSVGHDLNELSGVGLNLLEEEKNFDQEKLKFPQQLNSIRRLMKE--------------------------------------------------
|
||||
>seq160
|
||||
-------------------------------LFFKHLPAYFTLSVTHSAVAPMAQLRGVGLNLKDELNADYLQLLKSPSEMAGLKTLLFELFGAREFIGYPIETDEGISGLVVALHGLEDPASRRLFEAFTrLFEMQY---
|
||||
>seq161
|
||||
---------------------------KRKGVFFKYLPTYCSLVAMGSFFFDSpKKLNGVGLNFSKSVKFKPSEHLQHGLKVPAFKKLCEKIFGHKNLNIRVLSVDHEVQGILVYEKPPANSL------------------
|
||||
>seq162
|
||||
------------SSVDECVQVFLASAAQalgsCPAVFFRYIANRRVLLAAYGEQMEAVDLSGLGLDLNETAPGFRTVQLREPMRIIPFVEMVKVVFAVPEFFAWPIHALNEIQGLACF--------------------------
|
||||
>seq163
|
||||
---------------EDIVNAFLTYLSElvdgKICLFLKFYPAKSALVVRNIKGHDLEKIyteqdisdfKNIGMSLGPASEKDIVSIVARIARHPSLKTLVTKLFNTSKYMAYPLIIRDTPIGVTIVVDEMTLSERDDkiLKQYLNQLEISY---
|
||||
>seq164
|
||||
----------------------------RKGLYFKYLPTYCSLVALDGFNFSNKKFNGVGLNFSSSKDFDANQHLNKLKQVPGFIKVIERVFGHQNINLKTLECDGEAKGVLVYEQAPR---------------------
|
||||
>seq165
|
||||
---------------SDTIQVFLEHTSQlaenSKVLFLRYLPSYYSLLLSHAASYQMEEGKKIGLNLKEIDPKKIMDILRLPQEMDLLKNLLTG--------------------------------------------------
|
||||
>seq166
|
||||
---RLMQAFSKAKDIDAVIQIYLEHTSQiignKPIVFFTHLSSYLSLLVSHVVGYEKEALRNVGVNLKSVESKEYMNLL-----------------------------------------------------------------
|
||||
>seq167
|
||||
-------------------VMMSEQTTKAPAVFLRHLPNRRCLVTRAAHRLPAEAWKSLGLHL-NEEPDFCLSDLRHPEKLSGLKEMGQTLVGHDEIWVRPLILRDEVYGLFVVFSALIDLPMNRLESIVK---------
|
||||
>seq168
|
||||
---------------------------KilpekRKGVFFKYLPTYCSLVAMGSFFFENpKKINGLGLNFSKSVKFKPRQHLQYGLKVPAFTKLCEKLFGHKKLNIRVLSADQDVKGILVYEKPPANSL------------------
|
||||
>seq169
|
||||
---------KKAKSLDELGINLVSSLNKivlpgRKGVYFKYLPTYCSLVALGGFNFESKKVTGVGLNFSTSKDFDASKHLQQLMYVPAFLKVVERLFEHTDVTVRTFDCDRESKGVVVYED------------------------
|
||||
>seq170
|
||||
---------------------------TqgRKGIYFKYLPTYCSLVALGGFNFKNTKVNGVGLNFSSSKDFNASQHLQKLLHVPAFIKVVERLFEHTEVVVRTFECDGETKGVAVYEKPPGG------SSDIEVLSLC----
|
||||
>seq171
|
||||
---------------DQCVQLFMESVSRvfsdVPILYFRYVASHMSLLVSQAVWLPIEKIRGIGVDLKNEDPARLPECFRDPSRLEPLKTLVQQVFR-----------------------------------------------
|
||||
>seq172
|
||||
-------------------------------IFFKYLPAHLSLVTSHASKIPLEQIKNLGINLSQLSQVDATKISEMLLQPSSlpgLPDLMREVFQLQAYEAIPFVHQN----------------------------------
|
||||
>seq173
|
||||
--------------------------------------------------INKETLKNIGVSIKDLDQKDYVEKLANPMELVGLKTLMKDFFQTTEYFAVPVEEDSAIAGIIVVFDPMKDVSVRRLFDSF----------
|
||||
>seq174
|
||||
--------------ADDCIQIFLQSCSNmlgsCGVIYMKYIANRRVLMTTLAHRIDAEW-NGIGVNFNETTGDSFR--TAHLREPNNipeVKQMIHEVFHTEEFFAHP---------------------------------------
|
||||
>seq175
|
||||
---EAILNLKKAQSLDELgvnLSWSLNHIVIdeRKGMYFKYLPTYCSLVSIGGFNLKDKKTNGVGLNF--SSSKDFNSLMHLREVLNVpaFENIVKKFFEHTDVQTRLFECDGGVKSLLVYERAPGGS-------------------
|
||||
>seq176
|
||||
-----------RNSVNECVQDFLDFGSKllgdCGAIYLKCLPLRK--VLSATHGVALENWKGVGVNLADESHFTWGALQEPQNVP-AIREMVREIFNRSDFQAFTFKVAKEVNGIALFFS------------------------
|
||||
>seq177
|
||||
-------------SIDDAMNVFLKNVSSvlgsPPVLYMKYIANRRVLMASQSQNLESFDLNGLGVNFNELNANFRASQLHDPQAIPEVGSLVKEVF------------------------------------------------
|
||||
>seq178
|
||||
------------NSADECLRVFLESASHslggCAAVYFRYIPNRRVLLAGHAINTHGIELKGLGINFNEVAPGFRTAQLRDPMGIPEFSEMIREIFGVSEFMAWPVE-------------------------------------
|
||||
>seq179
|
||||
--------LKKCETKKDLIKCFLVEFSRyykgAPVVFYKYVRSYKSLVSSMHLGVKSFKS-GEIIKLTAKEDGQLNNDKLDSIYLESLENI---AFDDEeGVYFLPLSILGELKGLFVFSRQEA---------------------
|
||||
>seq180
|
||||
--------------------------SKRKGIFLKYLPTYCSLVATSSFFFDSpRKLNGLGLNFSKAIKFNPKEHLQQGLKIPALAKLCYKTFGHRMLNSRVLTSDKDIQGILVYEKP-----------------------
|
||||
>seq181
|
||||
---------------DEIIETFLSYTSEllggATCVFLKYYPQKTALVARHVQcrsehcpytKEQLEGIKNVGMSLGVAGEKDIVSVISKITNHPSLKTLVYKLFNTSKYIAFPLIIRDTPLGVTLIVdvNSLGSKEDKIVQQ------------
|
||||
>seq182
|
||||
---------------DEIIETFLSYTSSllggSTCVFLKYFPQKTALVVRHIQckgqcsfsKEQMEALKNVGMSLGVAGEKDIVSIVSKISNHPSLKTLVNKLFNTTKYLAYPLIIRDTPLGVTlIVDQNsLDPKEEKIVQQYLN---------
|
||||
>seq183
|
||||
-----------CTSFTDSIDCFMSEISRylknTSVMYLKYVPAYRSLVTSRSVHLPDF-TSGESFDL-KTLFKENNLNETNFEKSNEFRDKIYDYTYWDNFSIIALRINGDVKGLFIIKTDFVNE-------------------
|
||||
>seq184
|
||||
------------------------------------------LVLTQSTGVDSNEFDGVGIDLAEHEPGFVPALLQRPHLLFSLQEFVKKGLGRQNAVILPLIFRGHVLGIFVLPEDMDKAMRH----------------
|
||||
>seq185
|
||||
-------------------------FNCQSLVYFRYLKSYSSLLVTHSEGLKFSDLRGKGISFSTNQNFVPERDLKRIDSNPLFYELVRKLIPNQAYTSFLFEACGEPKGVFVLANA-----------------------
|
||||
>seq186
|
||||
-----------CHSVEDAIQDWLNKINKlyqdTPTVFFRYIPNHSHLVLSQCAGLDLQKVRGIGVPLAGLSLKEQKYFYSHVRFLANLRDLVKGAFNVSEFEFRELVTDKAVLGLCVIFKNLKNESEKRfFNDSIELLNLVA---
|
||||
>seq187
|
||||
------------------------------WIawYFKFMPEVQAFVVTQYRRFNDKQTFPFsSFKPQTVSVTDLFALLHAGDRNVELFSFVAKHFDITNLQVFPISYSNLVDGIFCFIGQSEEK-------------------
|
||||
>seq188
|
||||
----------------------VRYYKTDLALYFRYNPGAGTLVVMRASGLPLEHFQAVGIHLRKNE-PGFTEEIL--HQPNRiriLREFVIAGMNRLDFVAFPHVENKIVRGLFVVPAKKNQ--------------------
|
||||
>seq189
|
||||
-------------------------------------------LVSHTACLPIDKFRGIGVQLHSQSAIDLAGQLNEPMRIDGLRQLVAEVFRRDNFFAFTHSSEGEILGVCIV--------------------------
|
||||
>seq190
|
||||
--------------VHETIQVFLDHVSQlfddSRVVFLRYLPAYYSLMVSHTAKIavqqpptaNPEEARKVGINLKDIDPKTVLDQLKNPQTFVPLSELMSE--------------------------------------------------
|
||||
>seq191
|
||||
------------TTRPQMIEKWMREALRvhctNEILYMSYLGTKKTLIVTQSLGFAADELDDVGLDLAKEEPGFEESMLQRPEKLWALHQFVTKGLQRREALYYSLMHHGQILGVFILPAKEGE--------------------
|
||||
>seq192
|
||||
-----------------IIDTFLSYMSElingKTCIFLKYYPAKTALIIKHMAGKNigtlysAEQIEGfknVGMSLGVAGEKDIVSIVSRIANHPSLKTLVSKLFNTTKYLAYPLIIRDTPIGVtlLVDIDSVNEQDEKIIKQYLNQFEISYD--
|
||||
>seq193
|
||||
-----------------------------------------------------------------EESKDPEKLFSEGKAPESILKLTKEGLGAEEPVMKFVSIRGSLDGIMAVWGQGSPVFWDQFDHEFSLFNLLYE--
|
Двоичный файл не отображается.
|
@ -0,0 +1,44 @@
|
|||
import os
|
||||
import numpy as np
|
||||
import string
|
||||
from typing import Iterable
|
||||
|
||||
def parse_a3m(filename):
|
||||
seqs = []
|
||||
table = str.maketrans(dict.fromkeys(string.ascii_lowercase))
|
||||
|
||||
# read file line by line
|
||||
for line in open(filename,"r"):
|
||||
# skip labels
|
||||
if line[0] != '>':
|
||||
# remove lowercase letters and right whitespaces
|
||||
seqs.append(line.rstrip().translate(table))
|
||||
return seqs
|
||||
|
||||
def logits2value(logits, bins):
|
||||
preds = np.argmax(logits, axis=2)
|
||||
retval = np.zeros(preds.shape)
|
||||
for i in range(len(preds)):
|
||||
for j in range(len(preds)):
|
||||
retval[i,j] = bins[preds[i,j]]
|
||||
|
||||
return retval
|
||||
|
||||
def loadT1001(preprocess=True):
|
||||
sample = np.load('T1001.npz')
|
||||
sample_dist = sample['dist']
|
||||
sample_omega = sample['omega']
|
||||
sample_theta = sample['theta']
|
||||
sample_phi = sample['phi']
|
||||
seq = parse_a3m('T1001.a3m')[0]
|
||||
|
||||
if not preprocess:
|
||||
return sample_dist, sample_omega, sample_theta, sample_phi, seq
|
||||
|
||||
else:
|
||||
dist = logits2value(sample_dist, [None] + list(np.linspace(2,20,37)))
|
||||
omega = logits2value(sample_omega, [None] + list(np.linspace(-180,180, 24)))
|
||||
theta = logits2value(sample_theta, [None] + list(np.linspace(-180,180, 24)))
|
||||
phi = logits2value(sample_theta, [None] + list(np.linspace(0,180, 24)))
|
||||
|
||||
return dist, omega, theta, phi, seq
|
|
@ -0,0 +1,46 @@
|
|||
from T1001_loader import *
|
||||
|
||||
import json, time, os, sys, glob
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
sys.path.insert(0, '../..')
|
||||
from sequence_models.graphmodel_utils import *
|
||||
from sequence_models.utils import Tokenizer
|
||||
|
||||
# load features
|
||||
dist, omega, theta, phi, seq = loadT1001()
|
||||
dist = torch.from_numpy(dist)
|
||||
omega = torch.from_numpy(omega)
|
||||
theta = torch.from_numpy(theta)
|
||||
phi = torch.from_numpy(phi)
|
||||
|
||||
# process features
|
||||
V = get_node_features(omega, theta, phi)
|
||||
E_idx = get_k_neighbors(dist, 10)
|
||||
E = get_edge_features(dist, omega, theta, phi, E_idx)
|
||||
mask = get_mask(E)
|
||||
E = replace_nan(E)
|
||||
L = len(seq)
|
||||
S = get_S_enc(seq, tokenizer)
|
||||
|
||||
# reshape
|
||||
V = V.view(1,140,10).float()
|
||||
E = E.view(1,140,10,6).float()
|
||||
E_idx = E_idx.view(1,140,10)
|
||||
mask = mask.view(1,140)
|
||||
S = S.view(1,140).long()
|
||||
L = [140]
|
||||
|
||||
decoder = Struct2Seq_decoder(num_letters=20,
|
||||
node_features=10,
|
||||
edge_features=6,
|
||||
hidden_dim=128,
|
||||
k_neighbors=30,
|
||||
protein_features='full',
|
||||
dropout=0.10,
|
||||
use_mpnn=False)
|
||||
|
||||
with torch.no_grad():
|
||||
decoder.eval()
|
||||
output = decoder(V, E, E_idx, S, L,mask)
|
Загрузка…
Ссылка в новой задаче