зеркало из https://github.com/microsoft/qlib.git
added KRNN and Sandwich models and their example results based on Alpha360 (#1414)
* Update README.md updated the result of KRNN and Sandwich models based on Alpha360 * Update README.md * Update README.md * Add files via upload * Update README.md * Update README.md * Update README.md * Add files via upload * Delete pytorch_krnn.py * Delete pytorch_sandwich.py * Add files via upload * Update pytorch_sandwich.py * Update pytorch_krnn.py * Update pytorch_sandwich.py * Update pytorch_krnn.py * Update README.md * Update README.md * Update requirements.txt * Update requirements.txt * Update README.md * Update README.md * Update pytorch_sandwich.py * Update link on index --------- Co-authored-by: Young <afe.young@gmail.com>
This commit is contained in:
Родитель
19a0eb78bc
Коммит
efffb2819a
|
@ -11,6 +11,7 @@
|
|||
Recent released features
|
||||
| Feature | Status |
|
||||
| -- | ------ |
|
||||
| KRNN and Sandwich models | :chart_with_upwards_trend: [Released](https://github.com/microsoft/qlib/pull/1414/) on May 26, 2023 |
|
||||
| Release Qlib v0.9.0 | :octocat: [Released](https://github.com/microsoft/qlib/releases/tag/v0.9.0) on Dec 9, 2022 |
|
||||
| RL Learning Framework | :hammer: :chart_with_upwards_trend: Released on Nov 10, 2022. [#1332](https://github.com/microsoft/qlib/pull/1332), [#1322](https://github.com/microsoft/qlib/pull/1322), [#1316](https://github.com/microsoft/qlib/pull/1316),[#1299](https://github.com/microsoft/qlib/pull/1299),[#1263](https://github.com/microsoft/qlib/pull/1263), [#1244](https://github.com/microsoft/qlib/pull/1244), [#1169](https://github.com/microsoft/qlib/pull/1169), [#1125](https://github.com/microsoft/qlib/pull/1125), [#1076](https://github.com/microsoft/qlib/pull/1076)|
|
||||
| HIST and IGMTF models | :chart_with_upwards_trend: [Released](https://github.com/microsoft/qlib/pull/1040) on Apr 10, 2022 |
|
||||
|
@ -353,6 +354,8 @@ Here is a list of models built on `Qlib`.
|
|||
- [ADD based on pytorch (Hongshun Tang, et al.2020)](examples/benchmarks/ADD/)
|
||||
- [IGMTF based on pytorch (Wentao Xu, et al.2021)](examples/benchmarks/IGMTF/)
|
||||
- [HIST based on pytorch (Wentao Xu, et al.2021)](examples/benchmarks/HIST/)
|
||||
- [KRNN based on pytorch](examples/benchmarks/KRNN/)
|
||||
- [Sandwich based on pytorch](examples/benchmarks/Sandwich/)
|
||||
|
||||
Your PR of new Quant models is highly welcomed.
|
||||
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
# KRNN
|
||||
* Code: [https://github.com/microsoft/FOST/blob/main/fostool/model/krnn.py](https://github.com/microsoft/FOST/blob/main/fostool/model/krnn.py)
|
||||
|
||||
|
||||
# Introductions about the settings/configs.
|
||||
* Torch_geometric is used in the original model in FOST, but we didn't use it.
|
||||
* make use your CUDA version matches the torch version to allow the usage of GPU, we use CUDA==10.2 and torch.__version__==1.12.1
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
numpy==1.23.4
|
||||
pandas==1.5.2
|
|
@ -0,0 +1,91 @@
|
|||
qlib_init:
|
||||
provider_uri: "~/.qlib/qlib_data/cn_data"
|
||||
region: cn
|
||||
market: &market csi300
|
||||
benchmark: &benchmark SH000300
|
||||
data_handler_config: &data_handler_config
|
||||
start_time: 2008-01-01
|
||||
end_time: 2020-08-01
|
||||
fit_start_time: 2008-01-01
|
||||
fit_end_time: 2014-12-31
|
||||
instruments: *market
|
||||
infer_processors:
|
||||
- class: RobustZScoreNorm
|
||||
kwargs:
|
||||
fields_group: feature
|
||||
clip_outlier: true
|
||||
- class: Fillna
|
||||
kwargs:
|
||||
fields_group: feature
|
||||
learn_processors:
|
||||
- class: DropnaLabel
|
||||
- class: CSRankNorm
|
||||
kwargs:
|
||||
fields_group: label
|
||||
label: ["Ref($close, -2) / Ref($close, -1) - 1"]
|
||||
port_analysis_config: &port_analysis_config
|
||||
strategy:
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
start_time: 2017-01-01
|
||||
end_time: 2020-08-01
|
||||
account: 100000000
|
||||
benchmark: *benchmark
|
||||
exchange_kwargs:
|
||||
limit_threshold: 0.095
|
||||
deal_price: close
|
||||
open_cost: 0.0005
|
||||
close_cost: 0.0015
|
||||
min_cost: 5
|
||||
task:
|
||||
model:
|
||||
class: KRNN
|
||||
module_path: qlib.contrib.model.pytorch_krnn
|
||||
kwargs:
|
||||
fea_dim: 6
|
||||
cnn_dim: 8
|
||||
cnn_kernel_size: 3
|
||||
rnn_dim: 8
|
||||
rnn_dups: 2
|
||||
rnn_layers: 2
|
||||
n_epochs: 200
|
||||
lr: 0.001
|
||||
early_stop: 20
|
||||
batch_size: 2000
|
||||
metric: loss
|
||||
GPU: 0
|
||||
dataset:
|
||||
class: DatasetH
|
||||
module_path: qlib.data.dataset
|
||||
kwargs:
|
||||
handler:
|
||||
class: Alpha360
|
||||
module_path: qlib.contrib.data.handler
|
||||
kwargs: *data_handler_config
|
||||
segments:
|
||||
train: [2008-01-01, 2014-12-31]
|
||||
valid: [2015-01-01, 2016-12-31]
|
||||
test: [2017-01-01, 2020-08-01]
|
||||
record:
|
||||
- class: SignalRecord
|
||||
module_path: qlib.workflow.record_temp
|
||||
kwargs:
|
||||
model: <MODEL>
|
||||
dataset: <DATASET>
|
||||
- class: SigAnaRecord
|
||||
module_path: qlib.workflow.record_temp
|
||||
kwargs:
|
||||
ana_long_short: False
|
||||
ann_scaler: 252
|
||||
- class: PortAnaRecord
|
||||
module_path: qlib.workflow.record_temp
|
||||
kwargs:
|
||||
config: *port_analysis_config
|
||||
|
|
@ -68,6 +68,8 @@ The numbers shown below demonstrate the performance of the entire `workflow` of
|
|||
| TRA(Hengxu Lin, et al.) | Alpha360 | 0.0485±0.00 | 0.3787±0.03 | 0.0587±0.00 | 0.4756±0.03 | 0.0920±0.03 | 1.2789±0.42 | -0.0834±0.02 |
|
||||
| IGMTF(Wentao Xu, et al.) | Alpha360 | 0.0480±0.00 | 0.3589±0.02 | 0.0606±0.00 | 0.4773±0.01 | 0.0946±0.02 | 1.3509±0.25 | -0.0716±0.02 |
|
||||
| HIST(Wentao Xu, et al.) | Alpha360 | 0.0522±0.00 | 0.3530±0.01 | 0.0667±0.00 | 0.4576±0.01 | 0.0987±0.02 | 1.3726±0.27 | -0.0681±0.01 |
|
||||
| KRNN | Alpha360 | 0.0173±0.01 | 0.1210±0.06 | 0.0270±0.01 | 0.2018±0.04 | -0.0465±0.05 | -0.5415±0.62 | -0.2919±0.13 |
|
||||
| Sandwich | Alpha360 | 0.0258±0.00 | 0.1924±0.04 | 0.0337±0.00 | 0.2624±0.03 | 0.0005±0.03 | 0.0001±0.33 | -0.1752±0.05 |
|
||||
|
||||
|
||||
- The selected 20 features are based on the feature importance of a lightgbm-based model.
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
# Sandwich
|
||||
* Code: [https://github.com/microsoft/FOST/blob/main/fostool/model/sandwich.py](https://github.com/microsoft/FOST/blob/main/fostool/model/sandwich.py)
|
||||
|
||||
|
||||
# Introductions about the settings/configs.
|
||||
* Torch_geometric is used in the original model in FOST, but we didn't use it.
|
||||
make use your CUDA version matches the torch version to allow the usage of GPU, we use CUDA==10.2 and torch.version==1.12.1
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
numpy==1.23.4
|
||||
pandas==1.5.2
|
|
@ -0,0 +1,93 @@
|
|||
qlib_init:
|
||||
provider_uri: "~/.qlib/qlib_data/cn_data"
|
||||
region: cn
|
||||
market: &market csi300
|
||||
benchmark: &benchmark SH000300
|
||||
data_handler_config: &data_handler_config
|
||||
start_time: 2008-01-01
|
||||
end_time: 2020-08-01
|
||||
fit_start_time: 2008-01-01
|
||||
fit_end_time: 2014-12-31
|
||||
instruments: *market
|
||||
infer_processors:
|
||||
- class: RobustZScoreNorm
|
||||
kwargs:
|
||||
fields_group: feature
|
||||
clip_outlier: true
|
||||
- class: Fillna
|
||||
kwargs:
|
||||
fields_group: feature
|
||||
learn_processors:
|
||||
- class: DropnaLabel
|
||||
- class: CSRankNorm
|
||||
kwargs:
|
||||
fields_group: label
|
||||
label: ["Ref($close, -2) / Ref($close, -1) - 1"]
|
||||
port_analysis_config: &port_analysis_config
|
||||
strategy:
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
start_time: 2017-01-01
|
||||
end_time: 2020-08-01
|
||||
account: 100000000
|
||||
benchmark: *benchmark
|
||||
exchange_kwargs:
|
||||
limit_threshold: 0.095
|
||||
deal_price: close
|
||||
open_cost: 0.0005
|
||||
close_cost: 0.0015
|
||||
min_cost: 5
|
||||
task:
|
||||
model:
|
||||
class: Sandwich
|
||||
module_path: qlib.contrib.model.pytorch_sandwich
|
||||
kwargs:
|
||||
fea_dim: 6
|
||||
cnn_dim_1: 16
|
||||
cnn_dim_2: 16
|
||||
cnn_kernel_size: 3
|
||||
rnn_dim_1: 8
|
||||
rnn_dim_2: 8
|
||||
rnn_dups: 2
|
||||
rnn_layers: 2
|
||||
n_epochs: 200
|
||||
lr: 0.001
|
||||
early_stop: 20
|
||||
batch_size: 2000
|
||||
metric: loss
|
||||
GPU: 0
|
||||
dataset:
|
||||
class: DatasetH
|
||||
module_path: qlib.data.dataset
|
||||
kwargs:
|
||||
handler:
|
||||
class: Alpha360
|
||||
module_path: qlib.contrib.data.handler
|
||||
kwargs: *data_handler_config
|
||||
segments:
|
||||
train: [2008-01-01, 2014-12-31]
|
||||
valid: [2015-01-01, 2016-12-31]
|
||||
test: [2017-01-01, 2020-08-01]
|
||||
record:
|
||||
- class: SignalRecord
|
||||
module_path: qlib.workflow.record_temp
|
||||
kwargs:
|
||||
model: <MODEL>
|
||||
dataset: <DATASET>
|
||||
- class: SigAnaRecord
|
||||
module_path: qlib.workflow.record_temp
|
||||
kwargs:
|
||||
ana_long_short: False
|
||||
ann_scaler: 252
|
||||
- class: PortAnaRecord
|
||||
module_path: qlib.workflow.record_temp
|
||||
kwargs:
|
||||
config: *port_analysis_config
|
||||
|
|
@ -0,0 +1,511 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from typing import Text, Union
|
||||
import copy
|
||||
from ...utils import get_or_create_path
|
||||
from ...log import get_module_logger
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
|
||||
from ...model.base import Model
|
||||
from ...data.dataset import DatasetH
|
||||
from ...data.dataset.handler import DataHandlerLP
|
||||
|
||||
########################################################################
|
||||
########################################################################
|
||||
########################################################################
|
||||
|
||||
|
||||
class CNNEncoderBase(nn.Module):
|
||||
def __init__(self, input_dim, output_dim, kernel_size, device):
|
||||
"""Build a basic CNN encoder
|
||||
|
||||
Parameters
|
||||
----------
|
||||
input_dim : int
|
||||
The input dimension
|
||||
output_dim : int
|
||||
The output dimension
|
||||
kernel_size : int
|
||||
The size of convolutional kernels
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
self.input_dim = input_dim
|
||||
self.output_dim = output_dim
|
||||
self.kernel_size = kernel_size
|
||||
self.device = device
|
||||
|
||||
# set padding to ensure the same length
|
||||
# it is correct only when kernel_size is odd, dilation is 1, stride is 1
|
||||
self.conv = nn.Conv1d(input_dim, output_dim, kernel_size, padding=(kernel_size - 1) // 2)
|
||||
|
||||
def forward(self, x):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
x : torch.Tensor
|
||||
input data
|
||||
|
||||
Returns
|
||||
-------
|
||||
torch.Tensor
|
||||
Updated representations
|
||||
"""
|
||||
|
||||
# input shape: [batch_size, seq_len*input_dim]
|
||||
# output shape: [batch_size, seq_len, input_dim]
|
||||
x = x.view(x.shape[0], -1, self.input_dim).permute(0, 2, 1).to(self.device)
|
||||
y = self.conv(x) # [batch_size, output_dim, conved_seq_len]
|
||||
y = y.permute(0, 2, 1) # [batch_size, conved_seq_len, output_dim]
|
||||
|
||||
return y
|
||||
|
||||
|
||||
class KRNNEncoderBase(nn.Module):
|
||||
def __init__(self, input_dim, output_dim, dup_num, rnn_layers, dropout, device):
|
||||
"""Build K parallel RNNs
|
||||
|
||||
Parameters
|
||||
----------
|
||||
input_dim : int
|
||||
The input dimension
|
||||
output_dim : int
|
||||
The output dimension
|
||||
dup_num : int
|
||||
The number of parallel RNNs
|
||||
rnn_layers: int
|
||||
The number of RNN layers
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
self.input_dim = input_dim
|
||||
self.output_dim = output_dim
|
||||
self.dup_num = dup_num
|
||||
self.rnn_layers = rnn_layers
|
||||
self.dropout = dropout
|
||||
self.device = device
|
||||
|
||||
self.rnn_modules = nn.ModuleList()
|
||||
for _ in range(dup_num):
|
||||
self.rnn_modules.append(nn.GRU(input_dim, output_dim, num_layers=self.rnn_layers, dropout=dropout))
|
||||
|
||||
def forward(self, x):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
x : torch.Tensor
|
||||
Input data
|
||||
n_id : torch.Tensor
|
||||
Node indices
|
||||
|
||||
Returns
|
||||
-------
|
||||
torch.Tensor
|
||||
Updated representations
|
||||
"""
|
||||
|
||||
# input shape: [batch_size, seq_len, input_dim]
|
||||
# output shape: [batch_size, seq_len, output_dim]
|
||||
# [seq_len, batch_size, input_dim]
|
||||
batch_size, seq_len, input_dim = x.shape
|
||||
x = x.permute(1, 0, 2).to(self.device)
|
||||
|
||||
hids = []
|
||||
for rnn in self.rnn_modules:
|
||||
h, _ = rnn(x) # [seq_len, batch_size, output_dim]
|
||||
hids.append(h)
|
||||
# [seq_len, batch_size, output_dim, num_dups]
|
||||
hids = torch.stack(hids, dim=-1)
|
||||
hids = hids.view(seq_len, batch_size, self.output_dim, self.dup_num)
|
||||
hids = hids.mean(dim=3)
|
||||
hids = hids.permute(1, 0, 2)
|
||||
|
||||
return hids
|
||||
|
||||
|
||||
class CNNKRNNEncoder(nn.Module):
|
||||
def __init__(
|
||||
self, cnn_input_dim, cnn_output_dim, cnn_kernel_size, rnn_output_dim, rnn_dup_num, rnn_layers, dropout, device
|
||||
):
|
||||
"""Build an encoder composed of CNN and KRNN
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cnn_input_dim : int
|
||||
The input dimension of CNN
|
||||
cnn_output_dim : int
|
||||
The output dimension of CNN
|
||||
cnn_kernel_size : int
|
||||
The size of convolutional kernels
|
||||
rnn_output_dim : int
|
||||
The output dimension of KRNN
|
||||
rnn_dup_num : int
|
||||
The number of parallel duplicates for KRNN
|
||||
rnn_layers : int
|
||||
The number of RNN layers
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
self.cnn_encoder = CNNEncoderBase(cnn_input_dim, cnn_output_dim, cnn_kernel_size, device)
|
||||
self.krnn_encoder = KRNNEncoderBase(cnn_output_dim, rnn_output_dim, rnn_dup_num, rnn_layers, dropout, device)
|
||||
|
||||
def forward(self, x):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
x : torch.Tensor
|
||||
Input data
|
||||
n_id : torch.Tensor
|
||||
Node indices
|
||||
|
||||
Returns
|
||||
-------
|
||||
torch.Tensor
|
||||
Updated representations
|
||||
"""
|
||||
cnn_out = self.cnn_encoder(x)
|
||||
krnn_out = self.krnn_encoder(cnn_out)
|
||||
|
||||
return krnn_out
|
||||
|
||||
|
||||
class KRNNModel(nn.Module):
|
||||
def __init__(self, fea_dim, cnn_dim, cnn_kernel_size, rnn_dim, rnn_dups, rnn_layers, dropout, device, **params):
|
||||
"""Build a KRNN model
|
||||
|
||||
Parameters
|
||||
----------
|
||||
fea_dim : int
|
||||
The feature dimension
|
||||
cnn_dim : int
|
||||
The hidden dimension of CNN
|
||||
cnn_kernel_size : int
|
||||
The size of convolutional kernels
|
||||
rnn_dim : int
|
||||
The hidden dimension of KRNN
|
||||
rnn_dups : int
|
||||
The number of parallel duplicates
|
||||
rnn_layers: int
|
||||
The number of RNN layers
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
self.encoder = CNNKRNNEncoder(
|
||||
cnn_input_dim=fea_dim,
|
||||
cnn_output_dim=cnn_dim,
|
||||
cnn_kernel_size=cnn_kernel_size,
|
||||
rnn_output_dim=rnn_dim,
|
||||
rnn_dup_num=rnn_dups,
|
||||
rnn_layers=rnn_layers,
|
||||
dropout=dropout,
|
||||
device=device,
|
||||
)
|
||||
|
||||
self.out_fc = nn.Linear(rnn_dim, 1)
|
||||
self.device = device
|
||||
|
||||
def forward(self, x):
|
||||
# x: [batch_size, node_num, seq_len, input_dim]
|
||||
encode = self.encoder(x)
|
||||
out = self.out_fc(encode[:, -1, :]).squeeze().to(self.device)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class KRNN(Model):
|
||||
"""KRNN Model
|
||||
|
||||
Parameters
|
||||
----------
|
||||
d_feat : int
|
||||
input dimension for each time step
|
||||
metric: str
|
||||
the evaluation metric used in early stop
|
||||
optimizer : str
|
||||
optimizer name
|
||||
GPU : str
|
||||
the GPU ID(s) used for training
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
fea_dim=6,
|
||||
cnn_dim=64,
|
||||
cnn_kernel_size=3,
|
||||
rnn_dim=64,
|
||||
rnn_dups=3,
|
||||
rnn_layers=2,
|
||||
dropout=0,
|
||||
n_epochs=200,
|
||||
lr=0.001,
|
||||
metric="",
|
||||
batch_size=2000,
|
||||
early_stop=20,
|
||||
loss="mse",
|
||||
optimizer="adam",
|
||||
GPU=0,
|
||||
seed=None,
|
||||
**kwargs
|
||||
):
|
||||
# Set logger.
|
||||
self.logger = get_module_logger("KRNN")
|
||||
self.logger.info("KRNN pytorch version...")
|
||||
|
||||
# set hyper-parameters.
|
||||
self.fea_dim = fea_dim
|
||||
self.cnn_dim = cnn_dim
|
||||
self.cnn_kernel_size = cnn_kernel_size
|
||||
self.rnn_dim = rnn_dim
|
||||
self.rnn_dups = rnn_dups
|
||||
self.rnn_layers = rnn_layers
|
||||
self.dropout = dropout
|
||||
self.n_epochs = n_epochs
|
||||
self.lr = lr
|
||||
self.metric = metric
|
||||
self.batch_size = batch_size
|
||||
self.early_stop = early_stop
|
||||
self.optimizer = optimizer.lower()
|
||||
self.loss = loss
|
||||
self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu")
|
||||
self.seed = seed
|
||||
|
||||
self.logger.info(
|
||||
"KRNN parameters setting:"
|
||||
"\nfea_dim : {}"
|
||||
"\ncnn_dim : {}"
|
||||
"\ncnn_kernel_size : {}"
|
||||
"\nrnn_dim : {}"
|
||||
"\nrnn_dups : {}"
|
||||
"\nrnn_layers : {}"
|
||||
"\ndropout : {}"
|
||||
"\nn_epochs : {}"
|
||||
"\nlr : {}"
|
||||
"\nmetric : {}"
|
||||
"\nbatch_size: {}"
|
||||
"\nearly_stop : {}"
|
||||
"\noptimizer : {}"
|
||||
"\nloss_type : {}"
|
||||
"\nvisible_GPU : {}"
|
||||
"\nuse_GPU : {}"
|
||||
"\nseed : {}".format(
|
||||
fea_dim,
|
||||
cnn_dim,
|
||||
cnn_kernel_size,
|
||||
rnn_dim,
|
||||
rnn_dups,
|
||||
rnn_layers,
|
||||
dropout,
|
||||
n_epochs,
|
||||
lr,
|
||||
metric,
|
||||
batch_size,
|
||||
early_stop,
|
||||
optimizer.lower(),
|
||||
loss,
|
||||
GPU,
|
||||
self.use_gpu,
|
||||
seed,
|
||||
)
|
||||
)
|
||||
|
||||
if self.seed is not None:
|
||||
np.random.seed(self.seed)
|
||||
torch.manual_seed(self.seed)
|
||||
|
||||
self.krnn_model = KRNNModel(
|
||||
fea_dim=self.fea_dim,
|
||||
cnn_dim=self.cnn_dim,
|
||||
cnn_kernel_size=self.cnn_kernel_size,
|
||||
rnn_dim=self.rnn_dim,
|
||||
rnn_dups=self.rnn_dups,
|
||||
rnn_layers=self.rnn_layers,
|
||||
dropout=self.dropout,
|
||||
device=self.device,
|
||||
)
|
||||
if optimizer.lower() == "adam":
|
||||
self.train_optimizer = optim.Adam(self.krnn_model.parameters(), lr=self.lr)
|
||||
elif optimizer.lower() == "gd":
|
||||
self.train_optimizer = optim.SGD(self.krnn_model.parameters(), lr=self.lr)
|
||||
else:
|
||||
raise NotImplementedError("optimizer {} is not supported!".format(optimizer))
|
||||
|
||||
self.fitted = False
|
||||
self.krnn_model.to(self.device)
|
||||
|
||||
@property
|
||||
def use_gpu(self):
|
||||
return self.device != torch.device("cpu")
|
||||
|
||||
def mse(self, pred, label):
|
||||
loss = (pred - label) ** 2
|
||||
return torch.mean(loss)
|
||||
|
||||
def loss_fn(self, pred, label):
|
||||
mask = ~torch.isnan(label)
|
||||
|
||||
if self.loss == "mse":
|
||||
return self.mse(pred[mask], label[mask])
|
||||
|
||||
raise ValueError("unknown loss `%s`" % self.loss)
|
||||
|
||||
def metric_fn(self, pred, label):
|
||||
mask = torch.isfinite(label)
|
||||
|
||||
if self.metric in ("", "loss"):
|
||||
return -self.loss_fn(pred[mask], label[mask])
|
||||
|
||||
raise ValueError("unknown metric `%s`" % self.metric)
|
||||
|
||||
def get_daily_inter(self, df, shuffle=False):
|
||||
# organize the train data into daily batches
|
||||
daily_count = df.groupby(level=0).size().values
|
||||
daily_index = np.roll(np.cumsum(daily_count), 1)
|
||||
daily_index[0] = 0
|
||||
if shuffle:
|
||||
# shuffle data
|
||||
daily_shuffle = list(zip(daily_index, daily_count))
|
||||
np.random.shuffle(daily_shuffle)
|
||||
daily_index, daily_count = zip(*daily_shuffle)
|
||||
return daily_index, daily_count
|
||||
|
||||
def train_epoch(self, x_train, y_train):
|
||||
x_train_values = x_train.values
|
||||
y_train_values = np.squeeze(y_train.values)
|
||||
self.krnn_model.train()
|
||||
|
||||
indices = np.arange(len(x_train_values))
|
||||
np.random.shuffle(indices)
|
||||
|
||||
for i in range(len(indices))[:: self.batch_size]:
|
||||
if len(indices) - i < self.batch_size:
|
||||
break
|
||||
|
||||
feature = torch.from_numpy(x_train_values[indices[i : i + self.batch_size]]).float().to(self.device)
|
||||
label = torch.from_numpy(y_train_values[indices[i : i + self.batch_size]]).float().to(self.device)
|
||||
|
||||
pred = self.krnn_model(feature)
|
||||
loss = self.loss_fn(pred, label)
|
||||
|
||||
self.train_optimizer.zero_grad()
|
||||
loss.backward()
|
||||
torch.nn.utils.clip_grad_value_(self.krnn_model.parameters(), 3.0)
|
||||
self.train_optimizer.step()
|
||||
|
||||
def test_epoch(self, data_x, data_y):
|
||||
# prepare training data
|
||||
x_values = data_x.values
|
||||
y_values = np.squeeze(data_y.values)
|
||||
|
||||
self.krnn_model.eval()
|
||||
|
||||
scores = []
|
||||
losses = []
|
||||
|
||||
indices = np.arange(len(x_values))
|
||||
|
||||
for i in range(len(indices))[:: self.batch_size]:
|
||||
if len(indices) - i < self.batch_size:
|
||||
break
|
||||
|
||||
feature = torch.from_numpy(x_values[indices[i : i + self.batch_size]]).float().to(self.device)
|
||||
label = torch.from_numpy(y_values[indices[i : i + self.batch_size]]).float().to(self.device)
|
||||
|
||||
pred = self.krnn_model(feature)
|
||||
loss = self.loss_fn(pred, label)
|
||||
losses.append(loss.item())
|
||||
|
||||
score = self.metric_fn(pred, label)
|
||||
scores.append(score.item())
|
||||
|
||||
return np.mean(losses), np.mean(scores)
|
||||
|
||||
def fit(
|
||||
self,
|
||||
dataset: DatasetH,
|
||||
evals_result=dict(),
|
||||
save_path=None,
|
||||
):
|
||||
df_train, df_valid, df_test = dataset.prepare(
|
||||
["train", "valid", "test"],
|
||||
col_set=["feature", "label"],
|
||||
data_key=DataHandlerLP.DK_L,
|
||||
)
|
||||
if df_train.empty or df_valid.empty:
|
||||
raise ValueError("Empty data from dataset, please check your dataset config.")
|
||||
|
||||
x_train, y_train = df_train["feature"], df_train["label"]
|
||||
x_valid, y_valid = df_valid["feature"], df_valid["label"]
|
||||
|
||||
save_path = get_or_create_path(save_path)
|
||||
stop_steps = 0
|
||||
train_loss = 0
|
||||
best_score = -np.inf
|
||||
best_epoch = 0
|
||||
evals_result["train"] = []
|
||||
evals_result["valid"] = []
|
||||
|
||||
# train
|
||||
self.logger.info("training...")
|
||||
self.fitted = True
|
||||
|
||||
for step in range(self.n_epochs):
|
||||
self.logger.info("Epoch%d:", step)
|
||||
self.logger.info("training...")
|
||||
self.train_epoch(x_train, y_train)
|
||||
self.logger.info("evaluating...")
|
||||
train_loss, train_score = self.test_epoch(x_train, y_train)
|
||||
val_loss, val_score = self.test_epoch(x_valid, y_valid)
|
||||
self.logger.info("train %.6f, valid %.6f" % (train_score, val_score))
|
||||
evals_result["train"].append(train_score)
|
||||
evals_result["valid"].append(val_score)
|
||||
|
||||
if val_score > best_score:
|
||||
best_score = val_score
|
||||
stop_steps = 0
|
||||
best_epoch = step
|
||||
best_param = copy.deepcopy(self.krnn_model.state_dict())
|
||||
else:
|
||||
stop_steps += 1
|
||||
if stop_steps >= self.early_stop:
|
||||
self.logger.info("early stop")
|
||||
break
|
||||
|
||||
self.logger.info("best score: %.6lf @ %d" % (best_score, best_epoch))
|
||||
self.krnn_model.load_state_dict(best_param)
|
||||
torch.save(best_param, save_path)
|
||||
|
||||
if self.use_gpu:
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
|
||||
if not self.fitted:
|
||||
raise ValueError("model is not fitted yet!")
|
||||
|
||||
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
|
||||
index = x_test.index
|
||||
self.krnn_model.eval()
|
||||
x_values = x_test.values
|
||||
sample_num = x_values.shape[0]
|
||||
preds = []
|
||||
|
||||
for begin in range(sample_num)[:: self.batch_size]:
|
||||
if sample_num - begin < self.batch_size:
|
||||
end = sample_num
|
||||
else:
|
||||
end = begin + self.batch_size
|
||||
x_batch = torch.from_numpy(x_values[begin:end]).float().to(self.device)
|
||||
with torch.no_grad():
|
||||
pred = self.krnn_model(x_batch).detach().cpu().numpy()
|
||||
preds.append(pred)
|
||||
|
||||
return pd.Series(np.concatenate(preds), index=index)
|
|
@ -0,0 +1,376 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from typing import Text, Union
|
||||
import copy
|
||||
from ...utils import get_or_create_path
|
||||
from ...log import get_module_logger
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
|
||||
from ...model.base import Model
|
||||
from ...data.dataset import DatasetH
|
||||
from ...data.dataset.handler import DataHandlerLP
|
||||
from .pytorch_krnn import CNNKRNNEncoder
|
||||
|
||||
|
||||
class SandwichModel(nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
fea_dim,
|
||||
cnn_dim_1,
|
||||
cnn_dim_2,
|
||||
cnn_kernel_size,
|
||||
rnn_dim_1,
|
||||
rnn_dim_2,
|
||||
rnn_dups,
|
||||
rnn_layers,
|
||||
dropout,
|
||||
device,
|
||||
**params
|
||||
):
|
||||
"""Build a Sandwich model
|
||||
|
||||
Parameters
|
||||
----------
|
||||
fea_dim : int
|
||||
The feature dimension
|
||||
cnn_dim_1 : int
|
||||
The hidden dimension of the first CNN
|
||||
cnn_dim_2 : int
|
||||
The hidden dimension of the second CNN
|
||||
cnn_kernel_size : int
|
||||
The size of convolutional kernels
|
||||
rnn_dim_1 : int
|
||||
The hidden dimension of the first KRNN
|
||||
rnn_dim_2 : int
|
||||
The hidden dimension of the second KRNN
|
||||
rnn_dups : int
|
||||
The number of parallel duplicates
|
||||
rnn_layers: int
|
||||
The number of RNN layers
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
self.first_encoder = CNNKRNNEncoder(
|
||||
cnn_input_dim=fea_dim,
|
||||
cnn_output_dim=cnn_dim_1,
|
||||
cnn_kernel_size=cnn_kernel_size,
|
||||
rnn_output_dim=rnn_dim_1,
|
||||
rnn_dup_num=rnn_dups,
|
||||
rnn_layers=rnn_layers,
|
||||
dropout=dropout,
|
||||
device=device,
|
||||
)
|
||||
|
||||
self.second_encoder = CNNKRNNEncoder(
|
||||
cnn_input_dim=rnn_dim_1,
|
||||
cnn_output_dim=cnn_dim_2,
|
||||
cnn_kernel_size=cnn_kernel_size,
|
||||
rnn_output_dim=rnn_dim_2,
|
||||
rnn_dup_num=rnn_dups,
|
||||
rnn_layers=rnn_layers,
|
||||
dropout=dropout,
|
||||
device=device,
|
||||
)
|
||||
|
||||
self.out_fc = nn.Linear(rnn_dim_2, 1)
|
||||
self.device = device
|
||||
|
||||
def forward(self, x):
|
||||
# x: [batch_size, node_num, seq_len, input_dim]
|
||||
encode = self.first_encoder(x)
|
||||
encode = self.second_encoder(encode)
|
||||
out = self.out_fc(encode[:, -1, :]).squeeze().to(self.device)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class Sandwich(Model):
|
||||
"""Sandwich Model
|
||||
|
||||
Parameters
|
||||
----------
|
||||
d_feat : int
|
||||
input dimension for each time step
|
||||
metric: str
|
||||
the evaluation metric used in early stop
|
||||
optimizer : str
|
||||
optimizer name
|
||||
GPU : str
|
||||
the GPU ID(s) used for training
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
fea_dim=6,
|
||||
cnn_dim_1=64,
|
||||
cnn_dim_2=32,
|
||||
cnn_kernel_size=3,
|
||||
rnn_dim_1=16,
|
||||
rnn_dim_2=8,
|
||||
rnn_dups=3,
|
||||
rnn_layers=2,
|
||||
dropout=0,
|
||||
n_epochs=200,
|
||||
lr=0.001,
|
||||
metric="",
|
||||
batch_size=2000,
|
||||
early_stop=20,
|
||||
loss="mse",
|
||||
optimizer="adam",
|
||||
GPU=0,
|
||||
seed=None,
|
||||
**kwargs
|
||||
):
|
||||
# Set logger.
|
||||
self.logger = get_module_logger("Sandwich")
|
||||
self.logger.info("Sandwich pytorch version...")
|
||||
|
||||
# set hyper-parameters.
|
||||
self.fea_dim = fea_dim
|
||||
self.cnn_dim_1 = cnn_dim_1
|
||||
self.cnn_dim_2 = cnn_dim_2
|
||||
self.cnn_kernel_size = cnn_kernel_size
|
||||
self.rnn_dim_1 = rnn_dim_1
|
||||
self.rnn_dim_2 = rnn_dim_2
|
||||
self.rnn_dups = rnn_dups
|
||||
self.rnn_layers = rnn_layers
|
||||
self.dropout = dropout
|
||||
self.n_epochs = n_epochs
|
||||
self.lr = lr
|
||||
self.metric = metric
|
||||
self.batch_size = batch_size
|
||||
self.early_stop = early_stop
|
||||
self.optimizer = optimizer.lower()
|
||||
self.loss = loss
|
||||
self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu")
|
||||
self.seed = seed
|
||||
|
||||
self.logger.info(
|
||||
"Sandwich parameters setting:"
|
||||
"\nfea_dim : {}"
|
||||
"\ncnn_dim_1 : {}"
|
||||
"\ncnn_dim_2 : {}"
|
||||
"\ncnn_kernel_size : {}"
|
||||
"\nrnn_dim_1 : {}"
|
||||
"\nrnn_dim_2 : {}"
|
||||
"\nrnn_dups : {}"
|
||||
"\nrnn_layers : {}"
|
||||
"\ndropout : {}"
|
||||
"\nn_epochs : {}"
|
||||
"\nlr : {}"
|
||||
"\nmetric : {}"
|
||||
"\nbatch_size: {}"
|
||||
"\nearly_stop : {}"
|
||||
"\noptimizer : {}"
|
||||
"\nloss_type : {}"
|
||||
"\nvisible_GPU : {}"
|
||||
"\nuse_GPU : {}"
|
||||
"\nseed : {}".format(
|
||||
fea_dim,
|
||||
cnn_dim_1,
|
||||
cnn_dim_2,
|
||||
cnn_kernel_size,
|
||||
rnn_dim_1,
|
||||
rnn_dim_2,
|
||||
rnn_dups,
|
||||
rnn_layers,
|
||||
dropout,
|
||||
n_epochs,
|
||||
lr,
|
||||
metric,
|
||||
batch_size,
|
||||
early_stop,
|
||||
optimizer.lower(),
|
||||
loss,
|
||||
GPU,
|
||||
self.use_gpu,
|
||||
seed,
|
||||
)
|
||||
)
|
||||
|
||||
if self.seed is not None:
|
||||
np.random.seed(self.seed)
|
||||
torch.manual_seed(self.seed)
|
||||
|
||||
self.sandwich_model = SandwichModel(
|
||||
fea_dim=self.fea_dim,
|
||||
cnn_dim_1=self.cnn_dim_1,
|
||||
cnn_dim_2=self.cnn_dim_2,
|
||||
cnn_kernel_size=self.cnn_kernel_size,
|
||||
rnn_dim_1=self.rnn_dim_1,
|
||||
rnn_dim_2=self.rnn_dim_2,
|
||||
rnn_dups=self.rnn_dups,
|
||||
rnn_layers=self.rnn_layers,
|
||||
dropout=self.dropout,
|
||||
device=self.device,
|
||||
)
|
||||
if optimizer.lower() == "adam":
|
||||
self.train_optimizer = optim.Adam(self.sandwich_model.parameters(), lr=self.lr)
|
||||
elif optimizer.lower() == "gd":
|
||||
self.train_optimizer = optim.SGD(self.sandwich_model.parameters(), lr=self.lr)
|
||||
else:
|
||||
raise NotImplementedError("optimizer {} is not supported!".format(optimizer))
|
||||
|
||||
self.fitted = False
|
||||
self.sandwich_model.to(self.device)
|
||||
|
||||
@property
|
||||
def use_gpu(self):
|
||||
return self.device != torch.device("cpu")
|
||||
|
||||
def mse(self, pred, label):
|
||||
loss = (pred - label) ** 2
|
||||
return torch.mean(loss)
|
||||
|
||||
def loss_fn(self, pred, label):
|
||||
mask = ~torch.isnan(label)
|
||||
|
||||
if self.loss == "mse":
|
||||
return self.mse(pred[mask], label[mask])
|
||||
|
||||
raise ValueError("unknown loss `%s`" % self.loss)
|
||||
|
||||
def metric_fn(self, pred, label):
|
||||
mask = torch.isfinite(label)
|
||||
|
||||
if self.metric in ("", "loss"):
|
||||
return -self.loss_fn(pred[mask], label[mask])
|
||||
|
||||
raise ValueError("unknown metric `%s`" % self.metric)
|
||||
|
||||
def train_epoch(self, x_train, y_train):
|
||||
x_train_values = x_train.values
|
||||
y_train_values = np.squeeze(y_train.values)
|
||||
self.sandwich_model.train()
|
||||
|
||||
indices = np.arange(len(x_train_values))
|
||||
np.random.shuffle(indices)
|
||||
|
||||
for i in range(len(indices))[:: self.batch_size]:
|
||||
if len(indices) - i < self.batch_size:
|
||||
break
|
||||
|
||||
feature = torch.from_numpy(x_train_values[indices[i : i + self.batch_size]]).float().to(self.device)
|
||||
label = torch.from_numpy(y_train_values[indices[i : i + self.batch_size]]).float().to(self.device)
|
||||
|
||||
pred = self.sandwich_model(feature)
|
||||
loss = self.loss_fn(pred, label)
|
||||
|
||||
self.train_optimizer.zero_grad()
|
||||
loss.backward()
|
||||
torch.nn.utils.clip_grad_value_(self.sandwich_model.parameters(), 3.0)
|
||||
self.train_optimizer.step()
|
||||
|
||||
def test_epoch(self, data_x, data_y):
|
||||
# prepare training data
|
||||
x_values = data_x.values
|
||||
y_values = np.squeeze(data_y.values)
|
||||
|
||||
self.sandwich_model.eval()
|
||||
|
||||
scores = []
|
||||
losses = []
|
||||
|
||||
indices = np.arange(len(x_values))
|
||||
|
||||
for i in range(len(indices))[:: self.batch_size]:
|
||||
if len(indices) - i < self.batch_size:
|
||||
break
|
||||
|
||||
feature = torch.from_numpy(x_values[indices[i : i + self.batch_size]]).float().to(self.device)
|
||||
label = torch.from_numpy(y_values[indices[i : i + self.batch_size]]).float().to(self.device)
|
||||
|
||||
pred = self.sandwich_model(feature)
|
||||
loss = self.loss_fn(pred, label)
|
||||
losses.append(loss.item())
|
||||
|
||||
score = self.metric_fn(pred, label)
|
||||
scores.append(score.item())
|
||||
|
||||
return np.mean(losses), np.mean(scores)
|
||||
|
||||
def fit(
|
||||
self, dataset: DatasetH, evals_result=dict(), save_path=None,
|
||||
):
|
||||
df_train, df_valid, df_test = dataset.prepare(
|
||||
["train", "valid", "test"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L,
|
||||
)
|
||||
if df_train.empty or df_valid.empty:
|
||||
raise ValueError("Empty data from dataset, please check your dataset config.")
|
||||
|
||||
x_train, y_train = df_train["feature"], df_train["label"]
|
||||
x_valid, y_valid = df_valid["feature"], df_valid["label"]
|
||||
|
||||
save_path = get_or_create_path(save_path)
|
||||
stop_steps = 0
|
||||
train_loss = 0
|
||||
best_score = -np.inf
|
||||
best_epoch = 0
|
||||
evals_result["train"] = []
|
||||
evals_result["valid"] = []
|
||||
|
||||
# train
|
||||
self.logger.info("training...")
|
||||
self.fitted = True
|
||||
|
||||
for step in range(self.n_epochs):
|
||||
self.logger.info("Epoch%d:", step)
|
||||
self.logger.info("training...")
|
||||
self.train_epoch(x_train, y_train)
|
||||
self.logger.info("evaluating...")
|
||||
train_loss, train_score = self.test_epoch(x_train, y_train)
|
||||
val_loss, val_score = self.test_epoch(x_valid, y_valid)
|
||||
self.logger.info("train %.6f, valid %.6f" % (train_score, val_score))
|
||||
evals_result["train"].append(train_score)
|
||||
evals_result["valid"].append(val_score)
|
||||
|
||||
if val_score > best_score:
|
||||
best_score = val_score
|
||||
stop_steps = 0
|
||||
best_epoch = step
|
||||
best_param = copy.deepcopy(self.sandwich_model.state_dict())
|
||||
else:
|
||||
stop_steps += 1
|
||||
if stop_steps >= self.early_stop:
|
||||
self.logger.info("early stop")
|
||||
break
|
||||
|
||||
self.logger.info("best score: %.6lf @ %d" % (best_score, best_epoch))
|
||||
self.sandwich_model.load_state_dict(best_param)
|
||||
torch.save(best_param, save_path)
|
||||
|
||||
if self.use_gpu:
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
|
||||
if not self.fitted:
|
||||
raise ValueError("model is not fitted yet!")
|
||||
|
||||
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
|
||||
index = x_test.index
|
||||
self.sandwich_model.eval()
|
||||
x_values = x_test.values
|
||||
sample_num = x_values.shape[0]
|
||||
preds = []
|
||||
|
||||
for begin in range(sample_num)[:: self.batch_size]:
|
||||
if sample_num - begin < self.batch_size:
|
||||
end = sample_num
|
||||
else:
|
||||
end = begin + self.batch_size
|
||||
x_batch = torch.from_numpy(x_values[begin:end]).float().to(self.device)
|
||||
with torch.no_grad():
|
||||
pred = self.sandwich_model(x_batch).detach().cpu().numpy()
|
||||
preds.append(pred)
|
||||
|
||||
return pd.Series(np.concatenate(preds), index=index)
|
Загрузка…
Ссылка в новой задаче