зеркало из https://github.com/microsoft/qlib.git
Adjust rolling api (#1594)
* Intermediate version * Fix yaml template & Successfully run rolling * Be compatible with benchmark * Get same results with previous linear model * Black formatting * Update black * Update the placeholder mechanism * Update CI * Update CI * Upgrade Black * Fix CI and simplify code * Fix CI * Move the data processing caching mechanism into utils. * Adjusting DDG-DA * Organize import
This commit is contained in:
Родитель
8d3adf34ac
Коммит
be4646b4b7
|
@ -38,7 +38,7 @@ jobs:
|
||||||
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
|
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
|
||||||
run: |
|
run: |
|
||||||
twine upload dist/*
|
twine upload dist/*
|
||||||
|
|
||||||
deploy_with_manylinux:
|
deploy_with_manylinux:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
|
|
|
@ -8,6 +8,7 @@ on:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
|
if: ${{ false }} # FIXME: temporarily disable... Due to we are rushing a feature
|
||||||
timeout-minutes: 120
|
timeout-minutes: 120
|
||||||
|
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
|
|
|
@ -64,7 +64,10 @@ jobs:
|
||||||
python -m pip install -e .[dev]
|
python -m pip install -e .[dev]
|
||||||
|
|
||||||
- name: Lint with Black
|
- name: Lint with Black
|
||||||
|
# Python 3.7 will use a black with low level. So we use python with higher version for black check
|
||||||
|
if: (matrix.python-version != '3.7')
|
||||||
run: |
|
run: |
|
||||||
|
pip install -U black # follow the latest version of black, previous Qlib dependency will downgrade black
|
||||||
black . -l 120 --check --diff
|
black . -l 120 --check --diff
|
||||||
|
|
||||||
- name: Make html with sphinx
|
- name: Make html with sphinx
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
repos:
|
repos:
|
||||||
- repo: https://github.com/psf/black
|
- repo: https://github.com/psf/black
|
||||||
rev: 22.6.0
|
rev: 23.7.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: black
|
- id: black
|
||||||
args: ["qlib", "-l 120"]
|
args: ["qlib", "-l 120"]
|
||||||
|
@ -9,4 +9,4 @@ repos:
|
||||||
rev: 4.0.1
|
rev: 4.0.1
|
||||||
hooks:
|
hooks:
|
||||||
- id: flake8
|
- id: flake8
|
||||||
args: ["--ignore=E501,F541,E266,E402,W503,E731,E203"]
|
args: ["--ignore=E501,F541,E266,E402,W503,E731,E203"]
|
||||||
|
|
|
@ -53,9 +53,7 @@ Below is a typical config file of ``qrun``.
|
||||||
kwargs:
|
kwargs:
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
backtest:
|
backtest:
|
||||||
limit_threshold: 0.095
|
limit_threshold: 0.095
|
||||||
account: 100000000
|
account: 100000000
|
||||||
|
@ -281,9 +279,7 @@ The following script is the configuration of `backtest` and the `strategy` used
|
||||||
kwargs:
|
kwargs:
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
backtest:
|
backtest:
|
||||||
limit_threshold: 0.095
|
limit_threshold: 0.095
|
||||||
account: 100000000
|
account: 100000000
|
||||||
|
|
|
@ -28,8 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
model: <MODEL>
|
signal: <PRED>
|
||||||
dataset: <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -36,9 +36,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -35,9 +35,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -36,9 +36,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
@ -89,4 +87,4 @@ task:
|
||||||
- class: PortAnaRecord
|
- class: PortAnaRecord
|
||||||
module_path: qlib.workflow.record_temp
|
module_path: qlib.workflow.record_temp
|
||||||
kwargs:
|
kwargs:
|
||||||
config: *port_analysis_config
|
config: *port_analysis_config
|
||||||
|
|
|
@ -28,8 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
model: <MODEL>
|
signal: <PRED>
|
||||||
dataset: <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -36,9 +36,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -48,7 +48,6 @@ class Avg15minHandler(DataHandlerLP):
|
||||||
)
|
)
|
||||||
|
|
||||||
def loader_config(self):
|
def loader_config(self):
|
||||||
|
|
||||||
# Results for dataset: df: pd.DataFrame
|
# Results for dataset: df: pd.DataFrame
|
||||||
# len(df.columns) == 6 + 6 * 16, len(df.index.get_level_values(level="datetime").unique()) == T
|
# len(df.columns) == 6 + 6 * 16, len(df.index.get_level_values(level="datetime").unique()) == T
|
||||||
# df.columns: close0, close1, ..., close16, open0, ..., open16, ..., vwap16
|
# df.columns: close0, close1, ..., close16, open0, ..., open16, ..., vwap16
|
||||||
|
|
|
@ -14,8 +14,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
model: <MODEL>
|
signal: <PRED>
|
||||||
dataset: <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -14,8 +14,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
model: <MODEL>
|
signal: <PRED>
|
||||||
dataset: <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -33,9 +33,7 @@ port_analysis_config: &port_analysis_config
|
||||||
kwargs:
|
kwargs:
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
backtest:
|
backtest:
|
||||||
verbose: False
|
verbose: False
|
||||||
limit_threshold: 0.095
|
limit_threshold: 0.095
|
||||||
|
|
|
@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -29,9 +29,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -31,9 +31,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -27,9 +27,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -27,9 +27,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -36,9 +36,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -41,9 +41,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -41,9 +41,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -29,9 +29,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -29,9 +29,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -36,8 +36,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
model: <MODEL>
|
signal: <PRED>
|
||||||
dataset: <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -28,8 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
model: <MODEL>
|
signal: <PRED>
|
||||||
dataset: <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -30,9 +30,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
@ -95,4 +93,4 @@ task:
|
||||||
- class: PortAnaRecord
|
- class: PortAnaRecord
|
||||||
module_path: qlib.workflow.record_temp
|
module_path: qlib.workflow.record_temp
|
||||||
kwargs:
|
kwargs:
|
||||||
config: *port_analysis_config
|
config: *port_analysis_config
|
||||||
|
|
|
@ -139,7 +139,6 @@ class GenericDataFormatter(abc.ABC):
|
||||||
# Sanity checks first.
|
# Sanity checks first.
|
||||||
# Ensure only one ID and time column exist
|
# Ensure only one ID and time column exist
|
||||||
def _check_single_column(input_type):
|
def _check_single_column(input_type):
|
||||||
|
|
||||||
length = len([tup for tup in column_definition if tup[2] == input_type])
|
length = len([tup for tup in column_definition if tup[2] == input_type])
|
||||||
|
|
||||||
if length != 1:
|
if length != 1:
|
||||||
|
|
|
@ -78,7 +78,6 @@ class ExperimentConfig:
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def hyperparam_iterations(self):
|
def hyperparam_iterations(self):
|
||||||
|
|
||||||
return 240 if self.experiment == "volatility" else 60
|
return 240 if self.experiment == "volatility" else 60
|
||||||
|
|
||||||
def make_data_formatter(self):
|
def make_data_formatter(self):
|
||||||
|
|
|
@ -88,7 +88,6 @@ class HyperparamOptManager:
|
||||||
params_file = os.path.join(self.hyperparam_folder, "params.csv")
|
params_file = os.path.join(self.hyperparam_folder, "params.csv")
|
||||||
|
|
||||||
if os.path.exists(results_file) and os.path.exists(params_file):
|
if os.path.exists(results_file) and os.path.exists(params_file):
|
||||||
|
|
||||||
self.results = pd.read_csv(results_file, index_col=0)
|
self.results = pd.read_csv(results_file, index_col=0)
|
||||||
self.saved_params = pd.read_csv(params_file, index_col=0)
|
self.saved_params = pd.read_csv(params_file, index_col=0)
|
||||||
|
|
||||||
|
@ -178,7 +177,6 @@ class HyperparamOptManager:
|
||||||
return parameters
|
return parameters
|
||||||
|
|
||||||
for _ in range(self._max_tries):
|
for _ in range(self._max_tries):
|
||||||
|
|
||||||
parameters = _get_next()
|
parameters = _get_next()
|
||||||
name = self._get_name(parameters)
|
name = self._get_name(parameters)
|
||||||
|
|
||||||
|
|
|
@ -475,7 +475,6 @@ class TemporalFusionTransformer:
|
||||||
|
|
||||||
embeddings = []
|
embeddings = []
|
||||||
for i in range(num_categorical_variables):
|
for i in range(num_categorical_variables):
|
||||||
|
|
||||||
embedding = tf.keras.Sequential(
|
embedding = tf.keras.Sequential(
|
||||||
[
|
[
|
||||||
tf.keras.layers.InputLayer([time_steps]),
|
tf.keras.layers.InputLayer([time_steps]),
|
||||||
|
@ -680,7 +679,6 @@ class TemporalFusionTransformer:
|
||||||
|
|
||||||
data_map = {}
|
data_map = {}
|
||||||
for _, sliced in data.groupby(id_col):
|
for _, sliced in data.groupby(id_col):
|
||||||
|
|
||||||
col_mappings = {"identifier": [id_col], "time": [time_col], "outputs": [target_col], "inputs": input_cols}
|
col_mappings = {"identifier": [id_col], "time": [time_col], "outputs": [target_col], "inputs": input_cols}
|
||||||
|
|
||||||
for k in col_mappings:
|
for k in col_mappings:
|
||||||
|
@ -954,7 +952,6 @@ class TemporalFusionTransformer:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
with tf.variable_scope(self.name):
|
with tf.variable_scope(self.name):
|
||||||
|
|
||||||
transformer_layer, all_inputs, attention_components = self._build_base_graph()
|
transformer_layer, all_inputs, attention_components = self._build_base_graph()
|
||||||
|
|
||||||
outputs = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(self.output_size * len(self.quantiles)))(
|
outputs = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(self.output_size * len(self.quantiles)))(
|
||||||
|
|
|
@ -16,9 +16,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -6,7 +6,6 @@ from qlib.utils import init_instance_by_config
|
||||||
|
|
||||||
|
|
||||||
def main(seed, config_file="configs/config_alstm.yaml"):
|
def main(seed, config_file="configs/config_alstm.yaml"):
|
||||||
|
|
||||||
# set random seed
|
# set random seed
|
||||||
with open(config_file) as f:
|
with open(config_file) as f:
|
||||||
config = yaml.safe_load(f)
|
config = yaml.safe_load(f)
|
||||||
|
@ -30,7 +29,6 @@ def main(seed, config_file="configs/config_alstm.yaml"):
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
# set params from cmd
|
# set params from cmd
|
||||||
parser = argparse.ArgumentParser(allow_abbrev=False)
|
parser = argparse.ArgumentParser(allow_abbrev=False)
|
||||||
parser.add_argument("--seed", type=int, default=1000, help="random seed")
|
parser.add_argument("--seed", type=int, default=1000, help="random seed")
|
||||||
|
|
|
@ -96,7 +96,6 @@ class MTSDatasetH(DatasetH):
|
||||||
drop_last=False,
|
drop_last=False,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
|
|
||||||
assert horizon > 0, "please specify `horizon` to avoid data leakage"
|
assert horizon > 0, "please specify `horizon` to avoid data leakage"
|
||||||
|
|
||||||
self.seq_len = seq_len
|
self.seq_len = seq_len
|
||||||
|
@ -111,7 +110,6 @@ class MTSDatasetH(DatasetH):
|
||||||
super().__init__(handler, segments, **kwargs)
|
super().__init__(handler, segments, **kwargs)
|
||||||
|
|
||||||
def setup_data(self, handler_kwargs: dict = None, **kwargs):
|
def setup_data(self, handler_kwargs: dict = None, **kwargs):
|
||||||
|
|
||||||
super().setup_data()
|
super().setup_data()
|
||||||
|
|
||||||
# change index to <code, date>
|
# change index to <code, date>
|
||||||
|
|
|
@ -45,7 +45,6 @@ class TRAModel(Model):
|
||||||
avg_params=True,
|
avg_params=True,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
|
|
||||||
np.random.seed(seed)
|
np.random.seed(seed)
|
||||||
torch.manual_seed(seed)
|
torch.manual_seed(seed)
|
||||||
|
|
||||||
|
@ -93,7 +92,6 @@ class TRAModel(Model):
|
||||||
self.global_step = -1
|
self.global_step = -1
|
||||||
|
|
||||||
def train_epoch(self, data_set):
|
def train_epoch(self, data_set):
|
||||||
|
|
||||||
self.model.train()
|
self.model.train()
|
||||||
self.tra.train()
|
self.tra.train()
|
||||||
|
|
||||||
|
@ -146,7 +144,6 @@ class TRAModel(Model):
|
||||||
return total_loss
|
return total_loss
|
||||||
|
|
||||||
def test_epoch(self, data_set, return_pred=False):
|
def test_epoch(self, data_set, return_pred=False):
|
||||||
|
|
||||||
self.model.eval()
|
self.model.eval()
|
||||||
self.tra.eval()
|
self.tra.eval()
|
||||||
data_set.eval()
|
data_set.eval()
|
||||||
|
@ -204,7 +201,6 @@ class TRAModel(Model):
|
||||||
return metrics, preds
|
return metrics, preds
|
||||||
|
|
||||||
def fit(self, dataset, evals_result=dict()):
|
def fit(self, dataset, evals_result=dict()):
|
||||||
|
|
||||||
train_set, valid_set, test_set = dataset.prepare(["train", "valid", "test"])
|
train_set, valid_set, test_set = dataset.prepare(["train", "valid", "test"])
|
||||||
|
|
||||||
best_score = -1
|
best_score = -1
|
||||||
|
@ -380,7 +376,6 @@ class LSTM(nn.Module):
|
||||||
self.output_size = hidden_size
|
self.output_size = hidden_size
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
|
|
||||||
x = self.input_drop(x)
|
x = self.input_drop(x)
|
||||||
|
|
||||||
if self.training and self.noise_level > 0:
|
if self.training and self.noise_level > 0:
|
||||||
|
@ -464,7 +459,6 @@ class Transformer(nn.Module):
|
||||||
self.output_size = hidden_size
|
self.output_size = hidden_size
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
|
|
||||||
x = self.input_drop(x)
|
x = self.input_drop(x)
|
||||||
|
|
||||||
if self.training and self.noise_level > 0:
|
if self.training and self.noise_level > 0:
|
||||||
|
@ -514,7 +508,6 @@ class TRA(nn.Module):
|
||||||
self.predictors = nn.Linear(input_size, num_states)
|
self.predictors = nn.Linear(input_size, num_states)
|
||||||
|
|
||||||
def forward(self, hidden, hist_loss):
|
def forward(self, hidden, hist_loss):
|
||||||
|
|
||||||
preds = self.predictors(hidden)
|
preds = self.predictors(hidden)
|
||||||
|
|
||||||
if self.num_states == 1:
|
if self.num_states == 1:
|
||||||
|
|
|
@ -57,9 +57,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -51,9 +51,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -51,9 +51,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -36,9 +36,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -16,12 +16,12 @@ Though the dataset is different, the conclusion remains the same. By applying `D
|
||||||
# Run the Code
|
# Run the Code
|
||||||
Users can try `DDG-DA` by running the following command:
|
Users can try `DDG-DA` by running the following command:
|
||||||
```bash
|
```bash
|
||||||
python workflow.py run_all
|
python workflow.py run
|
||||||
```
|
```
|
||||||
|
|
||||||
The default forecasting models are `Linear`. Users can choose other forecasting models by changing the `forecast_model` parameter when `DDG-DA` initializes. For example, users can try `LightGBM` forecasting models by running the following command:
|
The default forecasting models are `Linear`. Users can choose other forecasting models by changing the `forecast_model` parameter when `DDG-DA` initializes. For example, users can try `LightGBM` forecasting models by running the following command:
|
||||||
```bash
|
```bash
|
||||||
python workflow.py --forecast_model="gbdt" run_all
|
python workflow.py --conf_path=../workflow_config_lightgbm_Alpha158.yaml run
|
||||||
```
|
```
|
||||||
|
|
||||||
# Results
|
# Results
|
||||||
|
|
|
@ -1,305 +1,40 @@
|
||||||
# Copyright (c) Microsoft Corporation.
|
# Copyright (c) Microsoft Corporation.
|
||||||
# Licensed under the MIT License.
|
# Licensed under the MIT License.
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from qlib.model.meta.task import MetaTask
|
from typing import Union
|
||||||
from qlib.contrib.meta.data_selection.model import MetaModelDS
|
|
||||||
from qlib.contrib.meta.data_selection.dataset import InternalData, MetaDatasetDS
|
|
||||||
from qlib.data.dataset.handler import DataHandlerLP
|
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
import fire
|
import fire
|
||||||
import sys
|
|
||||||
import pickle
|
|
||||||
from typing import Optional
|
|
||||||
from qlib import auto_init
|
from qlib import auto_init
|
||||||
from qlib.model.trainer import TrainerR
|
from qlib.contrib.rolling.ddgda import DDGDA
|
||||||
from qlib.typehint import Literal
|
|
||||||
from qlib.utils import init_instance_by_config
|
|
||||||
from qlib.workflow import R
|
|
||||||
from qlib.tests.data import GetData
|
from qlib.tests.data import GetData
|
||||||
|
|
||||||
DIRNAME = Path(__file__).absolute().resolve().parent
|
DIRNAME = Path(__file__).absolute().resolve().parent
|
||||||
sys.path.append(str(DIRNAME.parent / "baseline"))
|
BENCH_DIR = DIRNAME.parent / "baseline"
|
||||||
from rolling_benchmark import RollingBenchmark # NOTE: sys.path is changed for import RollingBenchmark
|
|
||||||
|
|
||||||
|
|
||||||
class DDGDA:
|
class DDGDABench(DDGDA):
|
||||||
"""
|
# The config in the README.md
|
||||||
please run `python workflow.py run_all` to run the full workflow of the experiment
|
CONF_LIST = [
|
||||||
|
BENCH_DIR / "workflow_config_linear_Alpha158.yaml",
|
||||||
|
BENCH_DIR / "workflow_config_lightgbm_Alpha158.yaml",
|
||||||
|
]
|
||||||
|
|
||||||
**NOTE**
|
DEFAULT_CONF = CONF_LIST[0] # Linear by default due to efficiency
|
||||||
before running the example, please clean your previous results with following command
|
|
||||||
- `rm -r mlruns`
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(
|
def __init__(self, conf_path: Union[str, Path] = DEFAULT_CONF, horizon=20, **kwargs) -> None:
|
||||||
self,
|
# This code is for being compatible with the previous old code
|
||||||
sim_task_model: Literal["linear", "gbdt"] = "gbdt",
|
conf_path = Path(conf_path)
|
||||||
forecast_model: Literal["linear", "gbdt"] = "linear",
|
super().__init__(conf_path=conf_path, horizon=horizon, working_dir=DIRNAME, **kwargs)
|
||||||
h_path: Optional[str] = None,
|
|
||||||
test_end: Optional[str] = None,
|
|
||||||
train_start: Optional[str] = None,
|
|
||||||
meta_1st_train_end: Optional[str] = None,
|
|
||||||
task_ext_conf: Optional[dict] = None,
|
|
||||||
alpha: float = 0.01,
|
|
||||||
proxy_hd: str = "handler_proxy.pkl",
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
|
|
||||||
Parameters
|
for f in self.CONF_LIST:
|
||||||
----------
|
if conf_path.samefile(f):
|
||||||
|
break
|
||||||
train_start: Optional[str]
|
else:
|
||||||
the start datetime for data. It is used in training start time (for both tasks & meta learing)
|
self.logger.warning("Model type is not in the benchmark!")
|
||||||
test_end: Optional[str]
|
|
||||||
the end datetime for data. It is used in test end time
|
|
||||||
meta_1st_train_end: Optional[str]
|
|
||||||
the datetime of training end of the first meta_task
|
|
||||||
alpha: float
|
|
||||||
Setting the L2 regularization for ridge
|
|
||||||
The `alpha` is only passed to MetaModelDS (it is not passed to sim_task_model currently..)
|
|
||||||
"""
|
|
||||||
self.step = 20
|
|
||||||
# NOTE:
|
|
||||||
# the horizon must match the meaning in the base task template
|
|
||||||
self.horizon = 20
|
|
||||||
self.meta_exp_name = "DDG-DA"
|
|
||||||
self.sim_task_model = sim_task_model # The model to capture the distribution of data.
|
|
||||||
self.forecast_model = forecast_model # downstream forecasting models' type
|
|
||||||
self.rb_kwargs = {
|
|
||||||
"h_path": h_path,
|
|
||||||
"test_end": test_end,
|
|
||||||
"train_start": train_start,
|
|
||||||
"task_ext_conf": task_ext_conf,
|
|
||||||
}
|
|
||||||
self.alpha = alpha
|
|
||||||
self.meta_1st_train_end = meta_1st_train_end
|
|
||||||
self.proxy_hd = proxy_hd
|
|
||||||
|
|
||||||
def get_feature_importance(self):
|
|
||||||
# this must be lightGBM, because it needs to get the feature importance
|
|
||||||
rb = RollingBenchmark(model_type="gbdt", **self.rb_kwargs)
|
|
||||||
task = rb.basic_task()
|
|
||||||
|
|
||||||
with R.start(experiment_name="feature_importance"):
|
|
||||||
model = init_instance_by_config(task["model"])
|
|
||||||
dataset = init_instance_by_config(task["dataset"])
|
|
||||||
model.fit(dataset)
|
|
||||||
|
|
||||||
fi = model.get_feature_importance()
|
|
||||||
|
|
||||||
# Because the model use numpy instead of dataframe for training lightgbm
|
|
||||||
# So the we must use following extra steps to get the right feature importance
|
|
||||||
df = dataset.prepare(segments=slice(None), col_set="feature", data_key=DataHandlerLP.DK_R)
|
|
||||||
cols = df.columns
|
|
||||||
fi_named = {cols[int(k.split("_")[1])]: imp for k, imp in fi.to_dict().items()}
|
|
||||||
|
|
||||||
return pd.Series(fi_named)
|
|
||||||
|
|
||||||
def dump_data_for_proxy_model(self):
|
|
||||||
"""
|
|
||||||
Dump data for training meta model.
|
|
||||||
The meta model will be trained upon the proxy forecasting model.
|
|
||||||
This dataset is for the proxy forecasting model.
|
|
||||||
"""
|
|
||||||
topk = 30
|
|
||||||
fi = self.get_feature_importance()
|
|
||||||
col_selected = fi.nlargest(topk)
|
|
||||||
|
|
||||||
rb = RollingBenchmark(model_type=self.sim_task_model, **self.rb_kwargs)
|
|
||||||
task = rb.basic_task()
|
|
||||||
dataset = init_instance_by_config(task["dataset"])
|
|
||||||
prep_ds = dataset.prepare(slice(None), col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
|
|
||||||
|
|
||||||
feature_df = prep_ds["feature"]
|
|
||||||
label_df = prep_ds["label"]
|
|
||||||
|
|
||||||
feature_selected = feature_df.loc[:, col_selected.index]
|
|
||||||
|
|
||||||
feature_selected = feature_selected.groupby("datetime", group_keys=False).apply(
|
|
||||||
lambda df: (df - df.mean()).div(df.std())
|
|
||||||
)
|
|
||||||
feature_selected = feature_selected.fillna(0.0)
|
|
||||||
|
|
||||||
df_all = {
|
|
||||||
"label": label_df.reindex(feature_selected.index),
|
|
||||||
"feature": feature_selected,
|
|
||||||
}
|
|
||||||
df_all = pd.concat(df_all, axis=1)
|
|
||||||
df_all.to_pickle(DIRNAME / "fea_label_df.pkl")
|
|
||||||
|
|
||||||
# dump data in handler format for aligning the interface
|
|
||||||
handler = DataHandlerLP(
|
|
||||||
data_loader={
|
|
||||||
"class": "qlib.data.dataset.loader.StaticDataLoader",
|
|
||||||
"kwargs": {"config": DIRNAME / "fea_label_df.pkl"},
|
|
||||||
}
|
|
||||||
)
|
|
||||||
handler.to_pickle(DIRNAME / self.proxy_hd, dump_all=True)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def _internal_data_path(self):
|
|
||||||
return DIRNAME / f"internal_data_s{self.step}.pkl"
|
|
||||||
|
|
||||||
def dump_meta_ipt(self):
|
|
||||||
"""
|
|
||||||
Dump data for training meta model.
|
|
||||||
This function will dump the input data for meta model
|
|
||||||
"""
|
|
||||||
# According to the experiments, the choice of the model type is very important for achieving good results
|
|
||||||
rb = RollingBenchmark(model_type=self.sim_task_model, **self.rb_kwargs)
|
|
||||||
sim_task = rb.basic_task()
|
|
||||||
|
|
||||||
if self.sim_task_model == "gbdt":
|
|
||||||
sim_task["model"].setdefault("kwargs", {}).update({"early_stopping_rounds": None, "num_boost_round": 150})
|
|
||||||
|
|
||||||
exp_name_sim = f"data_sim_s{self.step}"
|
|
||||||
|
|
||||||
internal_data = InternalData(sim_task, self.step, exp_name=exp_name_sim)
|
|
||||||
internal_data.setup(trainer=TrainerR)
|
|
||||||
|
|
||||||
with self._internal_data_path.open("wb") as f:
|
|
||||||
pickle.dump(internal_data, f)
|
|
||||||
|
|
||||||
def train_meta_model(self, fill_method="max"):
|
|
||||||
"""
|
|
||||||
training a meta model based on a simplified linear proxy model;
|
|
||||||
"""
|
|
||||||
|
|
||||||
# 1) leverage the simplified proxy forecasting model to train meta model.
|
|
||||||
# - Only the dataset part is important, in current version of meta model will integrate the
|
|
||||||
rb = RollingBenchmark(model_type=self.sim_task_model, **self.rb_kwargs)
|
|
||||||
sim_task = rb.basic_task()
|
|
||||||
# the train_start for training meta model does not necessarily align with final rolling
|
|
||||||
train_start = "2008-01-01" if self.rb_kwargs.get("train_start") is None else self.rb_kwargs.get("train_start")
|
|
||||||
train_end = "2010-12-31" if self.meta_1st_train_end is None else self.meta_1st_train_end
|
|
||||||
test_start = (pd.Timestamp(train_end) + pd.Timedelta(days=1)).strftime("%Y-%m-%d")
|
|
||||||
proxy_forecast_model_task = {
|
|
||||||
# "model": "qlib.contrib.model.linear.LinearModel",
|
|
||||||
"dataset": {
|
|
||||||
"class": "qlib.data.dataset.DatasetH",
|
|
||||||
"kwargs": {
|
|
||||||
"handler": f"file://{(DIRNAME / self.proxy_hd).absolute()}",
|
|
||||||
"segments": {
|
|
||||||
"train": (train_start, train_end),
|
|
||||||
"test": (test_start, sim_task["dataset"]["kwargs"]["segments"]["test"][1]),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
# "record": ["qlib.workflow.record_temp.SignalRecord"]
|
|
||||||
}
|
|
||||||
# the proxy_forecast_model_task will be used to create meta tasks.
|
|
||||||
# The test date of first task will be 2011-01-01. Each test segment will be about 20days
|
|
||||||
# The tasks include all training tasks and test tasks.
|
|
||||||
|
|
||||||
# 2) preparing meta dataset
|
|
||||||
kwargs = dict(
|
|
||||||
task_tpl=proxy_forecast_model_task,
|
|
||||||
step=self.step,
|
|
||||||
segments=0.62, # keep test period consistent with the dataset yaml
|
|
||||||
trunc_days=1 + self.horizon,
|
|
||||||
hist_step_n=30,
|
|
||||||
fill_method=fill_method,
|
|
||||||
rolling_ext_days=0,
|
|
||||||
)
|
|
||||||
# NOTE:
|
|
||||||
# the input of meta model (internal data) are shared between proxy model and final forecasting model
|
|
||||||
# but their task test segment are not aligned! It worked in my previous experiment.
|
|
||||||
# So the misalignment will not affect the effectiveness of the method.
|
|
||||||
with self._internal_data_path.open("rb") as f:
|
|
||||||
internal_data = pickle.load(f)
|
|
||||||
|
|
||||||
md = MetaDatasetDS(exp_name=internal_data, **kwargs)
|
|
||||||
|
|
||||||
# 3) train and logging meta model
|
|
||||||
with R.start(experiment_name=self.meta_exp_name):
|
|
||||||
R.log_params(**kwargs)
|
|
||||||
mm = MetaModelDS(
|
|
||||||
step=self.step, hist_step_n=kwargs["hist_step_n"], lr=0.001, max_epoch=30, seed=43, alpha=self.alpha
|
|
||||||
)
|
|
||||||
mm.fit(md)
|
|
||||||
R.save_objects(model=mm)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def _task_path(self):
|
|
||||||
return DIRNAME / f"tasks_s{self.step}.pkl"
|
|
||||||
|
|
||||||
def meta_inference(self):
|
|
||||||
"""
|
|
||||||
Leverage meta-model for inference:
|
|
||||||
- Given
|
|
||||||
- baseline tasks
|
|
||||||
- input for meta model(internal data)
|
|
||||||
- meta model (its learnt knowledge on proxy forecasting model is expected to transfer to normal forecasting model)
|
|
||||||
"""
|
|
||||||
# 1) get meta model
|
|
||||||
exp = R.get_exp(experiment_name=self.meta_exp_name)
|
|
||||||
rec = exp.list_recorders(rtype=exp.RT_L)[0]
|
|
||||||
meta_model: MetaModelDS = rec.load_object("model")
|
|
||||||
|
|
||||||
# 2)
|
|
||||||
# we are transfer to knowledge of meta model to final forecasting tasks.
|
|
||||||
# Create MetaTaskDataset for the final forecasting tasks
|
|
||||||
# Aligning the setting of it to the MetaTaskDataset when training Meta model is necessary
|
|
||||||
|
|
||||||
# 2.1) get previous config
|
|
||||||
param = rec.list_params()
|
|
||||||
trunc_days = int(param["trunc_days"])
|
|
||||||
step = int(param["step"])
|
|
||||||
hist_step_n = int(param["hist_step_n"])
|
|
||||||
fill_method = param.get("fill_method", "max")
|
|
||||||
|
|
||||||
rb = RollingBenchmark(model_type=self.forecast_model, **self.rb_kwargs)
|
|
||||||
task_l = rb.create_rolling_tasks()
|
|
||||||
|
|
||||||
# 2.2) create meta dataset for final dataset
|
|
||||||
kwargs = dict(
|
|
||||||
task_tpl=task_l,
|
|
||||||
step=step,
|
|
||||||
segments=0.0, # all the tasks are for testing
|
|
||||||
trunc_days=trunc_days,
|
|
||||||
hist_step_n=hist_step_n,
|
|
||||||
fill_method=fill_method,
|
|
||||||
task_mode=MetaTask.PROC_MODE_TRANSFER,
|
|
||||||
)
|
|
||||||
|
|
||||||
with self._internal_data_path.open("rb") as f:
|
|
||||||
internal_data = pickle.load(f)
|
|
||||||
mds = MetaDatasetDS(exp_name=internal_data, **kwargs)
|
|
||||||
|
|
||||||
# 3) meta model make inference and get new qlib task
|
|
||||||
new_tasks = meta_model.inference(mds)
|
|
||||||
with self._task_path.open("wb") as f:
|
|
||||||
pickle.dump(new_tasks, f)
|
|
||||||
|
|
||||||
def train_and_eval_tasks(self):
|
|
||||||
"""
|
|
||||||
Training the tasks generated by meta model
|
|
||||||
Then evaluate it
|
|
||||||
"""
|
|
||||||
with self._task_path.open("rb") as f:
|
|
||||||
tasks = pickle.load(f)
|
|
||||||
rb = RollingBenchmark(rolling_exp="rolling_ds", model_type=self.forecast_model, **self.rb_kwargs)
|
|
||||||
rb.train_rolling_tasks(tasks)
|
|
||||||
rb.ens_rolling()
|
|
||||||
rb.update_rolling_rec()
|
|
||||||
|
|
||||||
def run_all(self):
|
|
||||||
# 1) file: handler_proxy.pkl (self.proxy_hd)
|
|
||||||
self.dump_data_for_proxy_model()
|
|
||||||
# 2)
|
|
||||||
# file: internal_data_s20.pkl
|
|
||||||
# mlflow: data_sim_s20, models for calculating meta_ipt
|
|
||||||
self.dump_meta_ipt()
|
|
||||||
# 3) meta model will be stored in `DDG-DA`
|
|
||||||
self.train_meta_model()
|
|
||||||
# 4) new_tasks are saved in "tasks_s20.pkl" (reweighter is added)
|
|
||||||
self.meta_inference()
|
|
||||||
# 5) load the saved tasks and train model
|
|
||||||
self.train_and_eval_tasks()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
GetData().qlib_data(exists_skip=True)
|
GetData().qlib_data(exists_skip=True)
|
||||||
auto_init()
|
auto_init()
|
||||||
fire.Fire(DDGDA)
|
fire.Fire(DDGDABench)
|
||||||
|
|
|
@ -5,11 +5,12 @@ This is the framework of periodically Rolling Retrain (RR) forecasting models. R
|
||||||
## Run the Code
|
## Run the Code
|
||||||
Users can try RR by running the following command:
|
Users can try RR by running the following command:
|
||||||
```bash
|
```bash
|
||||||
python rolling_benchmark.py run_all
|
python rolling_benchmark.py run
|
||||||
```
|
```
|
||||||
|
|
||||||
The default forecasting models are `Linear`. Users can choose other forecasting models by changing the `model_type` parameter.
|
The default forecasting models are `Linear`. Users can choose other forecasting models by changing the `model_type` parameter.
|
||||||
For example, users can try `LightGBM` forecasting models by running the following command:
|
For example, users can try `LightGBM` forecasting models by running the following command:
|
||||||
```bash
|
```bash
|
||||||
python rolling_benchmark.py --model_type="gbdt" run_all
|
python rolling_benchmark.py --conf_path=workflow_config_lightgbm_Alpha158.yaml run
|
||||||
```
|
|
||||||
|
```
|
||||||
|
|
|
@ -1,161 +1,33 @@
|
||||||
# Copyright (c) Microsoft Corporation.
|
# Copyright (c) Microsoft Corporation.
|
||||||
# Licensed under the MIT License.
|
# Licensed under the MIT License.
|
||||||
from typing import Optional
|
|
||||||
from qlib.model.ens.ensemble import RollingEnsemble
|
|
||||||
from qlib.utils import init_instance_by_config
|
|
||||||
import fire
|
|
||||||
import yaml
|
|
||||||
import pandas as pd
|
|
||||||
from qlib import auto_init
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from tqdm.auto import tqdm
|
from typing import Union
|
||||||
from qlib.model.trainer import TrainerR
|
|
||||||
from qlib.log import get_module_logger
|
import fire
|
||||||
from qlib.utils.data import update_config
|
|
||||||
from qlib.workflow import R
|
from qlib import auto_init
|
||||||
|
from qlib.contrib.rolling.base import Rolling
|
||||||
from qlib.tests.data import GetData
|
from qlib.tests.data import GetData
|
||||||
|
|
||||||
DIRNAME = Path(__file__).absolute().resolve().parent
|
DIRNAME = Path(__file__).absolute().resolve().parent
|
||||||
from qlib.workflow.task.gen import task_generator, RollingGen
|
|
||||||
from qlib.workflow.task.collect import RecorderCollector
|
|
||||||
from qlib.workflow.record_temp import PortAnaRecord, SigAnaRecord
|
|
||||||
|
|
||||||
|
|
||||||
class RollingBenchmark:
|
class RollingBenchmark(Rolling):
|
||||||
"""
|
# The config in the README.md
|
||||||
**NOTE**
|
CONF_LIST = [DIRNAME / "workflow_config_linear_Alpha158.yaml", DIRNAME / "workflow_config_lightgbm_Alpha158.yaml"]
|
||||||
before running the example, please clean your previous results with following command
|
|
||||||
- `rm -r mlruns`
|
|
||||||
|
|
||||||
"""
|
DEFAULT_CONF = CONF_LIST[0]
|
||||||
|
|
||||||
def __init__(
|
def __init__(self, conf_path: Union[str, Path] = DEFAULT_CONF, horizon=20, **kwargs) -> None:
|
||||||
self,
|
# This code is for being compatible with the previous old code
|
||||||
rolling_exp: str = "rolling_models",
|
conf_path = Path(conf_path)
|
||||||
model_type: str = "linear",
|
super().__init__(conf_path=conf_path, horizon=horizon, **kwargs)
|
||||||
h_path: Optional[str] = None,
|
|
||||||
train_start: Optional[str] = None,
|
|
||||||
test_end: Optional[str] = None,
|
|
||||||
task_ext_conf: Optional[dict] = None,
|
|
||||||
) -> None:
|
|
||||||
"""
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
rolling_exp : str
|
|
||||||
The name for the experiments for rolling
|
|
||||||
model_type : str
|
|
||||||
The model to be boosted.
|
|
||||||
h_path : Optional[str]
|
|
||||||
the dumped data handler;
|
|
||||||
test_end : Optional[str]
|
|
||||||
the test end for the data. It is typically used together with the handler
|
|
||||||
train_start : Optional[str]
|
|
||||||
the train start for the data. It is typically used together with the handler.
|
|
||||||
task_ext_conf : Optional[dict]
|
|
||||||
some option to update the
|
|
||||||
"""
|
|
||||||
self.step = 20
|
|
||||||
self.horizon = 20
|
|
||||||
self.rolling_exp = rolling_exp
|
|
||||||
self.model_type = model_type
|
|
||||||
self.h_path = h_path
|
|
||||||
self.train_start = train_start
|
|
||||||
self.test_end = test_end
|
|
||||||
self.logger = get_module_logger("RollingBenchmark")
|
|
||||||
self.task_ext_conf = task_ext_conf
|
|
||||||
|
|
||||||
def basic_task(self):
|
for f in self.CONF_LIST:
|
||||||
"""For fast training rolling"""
|
if conf_path.samefile(f):
|
||||||
if self.model_type == "gbdt":
|
break
|
||||||
conf_path = DIRNAME / "workflow_config_lightgbm_Alpha158.yaml"
|
|
||||||
# dump the processed data on to disk for later loading to speed up the processing
|
|
||||||
h_path = DIRNAME / "lightgbm_alpha158_handler_horizon{}.pkl".format(self.horizon)
|
|
||||||
elif self.model_type == "linear":
|
|
||||||
# We use ridge regression to stabilize the performance
|
|
||||||
conf_path = DIRNAME / "workflow_config_linear_Alpha158.yaml"
|
|
||||||
h_path = DIRNAME / "linear_alpha158_handler_horizon{}.pkl".format(self.horizon)
|
|
||||||
else:
|
else:
|
||||||
raise AssertionError("Model type is not supported!")
|
self.logger.warning("Model type is not in the benchmark!")
|
||||||
|
|
||||||
if self.h_path is not None:
|
|
||||||
h_path = Path(self.h_path)
|
|
||||||
|
|
||||||
with conf_path.open("r") as f:
|
|
||||||
conf = yaml.safe_load(f)
|
|
||||||
|
|
||||||
# modify dataset horizon
|
|
||||||
conf["task"]["dataset"]["kwargs"]["handler"]["kwargs"]["label"] = [
|
|
||||||
"Ref($close, -{}) / Ref($close, -1) - 1".format(self.horizon + 1)
|
|
||||||
]
|
|
||||||
|
|
||||||
task = conf["task"]
|
|
||||||
|
|
||||||
if self.task_ext_conf is not None:
|
|
||||||
task = update_config(task, self.task_ext_conf)
|
|
||||||
|
|
||||||
if not h_path.exists():
|
|
||||||
h_conf = task["dataset"]["kwargs"]["handler"]
|
|
||||||
h = init_instance_by_config(h_conf)
|
|
||||||
h.to_pickle(h_path, dump_all=True)
|
|
||||||
|
|
||||||
task["dataset"]["kwargs"]["handler"] = f"file://{h_path}"
|
|
||||||
task["record"] = ["qlib.workflow.record_temp.SignalRecord"]
|
|
||||||
|
|
||||||
if self.train_start is not None:
|
|
||||||
seg = task["dataset"]["kwargs"]["segments"]["train"]
|
|
||||||
task["dataset"]["kwargs"]["segments"]["train"] = pd.Timestamp(self.train_start), seg[1]
|
|
||||||
|
|
||||||
if self.test_end is not None:
|
|
||||||
seg = task["dataset"]["kwargs"]["segments"]["test"]
|
|
||||||
task["dataset"]["kwargs"]["segments"]["test"] = seg[0], pd.Timestamp(self.test_end)
|
|
||||||
self.logger.info(task)
|
|
||||||
return task
|
|
||||||
|
|
||||||
def create_rolling_tasks(self):
|
|
||||||
task = self.basic_task()
|
|
||||||
task_l = task_generator(
|
|
||||||
task, RollingGen(step=self.step, trunc_days=self.horizon + 1)
|
|
||||||
) # the last two days should be truncated to avoid information leakage
|
|
||||||
return task_l
|
|
||||||
|
|
||||||
def train_rolling_tasks(self, task_l=None):
|
|
||||||
if task_l is None:
|
|
||||||
task_l = self.create_rolling_tasks()
|
|
||||||
trainer = TrainerR(experiment_name=self.rolling_exp)
|
|
||||||
trainer(task_l)
|
|
||||||
|
|
||||||
COMB_EXP = "rolling"
|
|
||||||
|
|
||||||
def ens_rolling(self):
|
|
||||||
rc = RecorderCollector(
|
|
||||||
experiment=self.rolling_exp,
|
|
||||||
artifacts_key=["pred", "label"],
|
|
||||||
process_list=[RollingEnsemble()],
|
|
||||||
# rec_key_func=lambda rec: (self.COMB_EXP, rec.info["id"]),
|
|
||||||
artifacts_path={"pred": "pred.pkl", "label": "label.pkl"},
|
|
||||||
)
|
|
||||||
res = rc()
|
|
||||||
with R.start(experiment_name=self.COMB_EXP):
|
|
||||||
R.log_params(exp_name=self.rolling_exp)
|
|
||||||
R.save_objects(**{"pred.pkl": res["pred"], "label.pkl": res["label"]})
|
|
||||||
|
|
||||||
def update_rolling_rec(self):
|
|
||||||
"""
|
|
||||||
Evaluate the combined rolling results
|
|
||||||
"""
|
|
||||||
for _, rec in R.list_recorders(experiment_name=self.COMB_EXP).items():
|
|
||||||
for rt_cls in SigAnaRecord, PortAnaRecord:
|
|
||||||
rt = rt_cls(recorder=rec, skip_existing=True)
|
|
||||||
rt.generate()
|
|
||||||
print(f"Your evaluation results can be found in the experiment named `{self.COMB_EXP}`.")
|
|
||||||
|
|
||||||
def run_all(self):
|
|
||||||
# the results will be save in mlruns.
|
|
||||||
# 1) each rolling task is saved in rolling_models
|
|
||||||
self.train_rolling_tasks()
|
|
||||||
# 2) combined rolling tasks and evaluation results are saved in rolling
|
|
||||||
self.ens_rolling()
|
|
||||||
self.update_rolling_rec()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
@ -14,8 +14,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
model: <MODEL>
|
signal: <PRED>
|
||||||
dataset: <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -27,9 +27,7 @@ port_analysis_config: &port_analysis_config
|
||||||
class: TopkDropoutStrategy
|
class: TopkDropoutStrategy
|
||||||
module_path: qlib.contrib.strategy
|
module_path: qlib.contrib.strategy
|
||||||
kwargs:
|
kwargs:
|
||||||
signal:
|
signal: <PRED>
|
||||||
- <MODEL>
|
|
||||||
- <DATASET>
|
|
||||||
topk: 50
|
topk: 50
|
||||||
n_drop: 5
|
n_drop: 5
|
||||||
backtest:
|
backtest:
|
||||||
|
|
|
@ -14,7 +14,6 @@ class HighFreqHandler(DataHandlerLP):
|
||||||
fit_end_time=None,
|
fit_end_time=None,
|
||||||
drop_raw=True,
|
drop_raw=True,
|
||||||
):
|
):
|
||||||
|
|
||||||
infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
|
infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
|
||||||
learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
|
learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
|
||||||
|
|
||||||
|
|
|
@ -18,7 +18,6 @@ from highfreq_ops import get_calendar_day, DayLast, FFillNan, BFillNan, Date, Se
|
||||||
|
|
||||||
|
|
||||||
class HighfreqWorkflow:
|
class HighfreqWorkflow:
|
||||||
|
|
||||||
SPEC_CONF = {"custom_ops": [DayLast, FFillNan, BFillNan, Date, Select, IsNull, Cut], "expression_cache": None}
|
SPEC_CONF = {"custom_ops": [DayLast, FFillNan, BFillNan, Date, Select, IsNull, Cut], "expression_cache": None}
|
||||||
|
|
||||||
MARKET = "all"
|
MARKET = "all"
|
||||||
|
|
|
@ -35,7 +35,6 @@ def objective(trial):
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
provider_uri = "~/.qlib/qlib_data/cn_data"
|
provider_uri = "~/.qlib/qlib_data/cn_data"
|
||||||
GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
|
GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
|
||||||
qlib.init(provider_uri=provider_uri, region="cn")
|
qlib.init(provider_uri=provider_uri, region="cn")
|
||||||
|
|
|
@ -38,7 +38,6 @@ def objective(trial):
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
provider_uri = "~/.qlib/qlib_data/cn_data"
|
provider_uri = "~/.qlib/qlib_data/cn_data"
|
||||||
GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
|
GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
|
||||||
qlib.init(provider_uri=provider_uri, region=REG_CN)
|
qlib.init(provider_uri=provider_uri, region=REG_CN)
|
||||||
|
|
|
@ -11,7 +11,6 @@ from qlib.tests.config import CSI300_GBDT_TASK
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
# use default data
|
# use default data
|
||||||
provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir
|
provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir
|
||||||
GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
|
GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
|
||||||
|
|
|
@ -9,7 +9,6 @@ from qlib.model.riskmodel import StructuredCovEstimator
|
||||||
|
|
||||||
|
|
||||||
def prepare_data(riskdata_root="./riskdata", T=240, start_time="2016-01-01"):
|
def prepare_data(riskdata_root="./riskdata", T=240, start_time="2016-01-01"):
|
||||||
|
|
||||||
universe = D.features(D.instruments("csi300"), ["$close"], start_time=start_time).swaplevel().sort_index()
|
universe = D.features(D.instruments("csi300"), ["$close"], start_time=start_time).swaplevel().sort_index()
|
||||||
|
|
||||||
price_all = (
|
price_all = (
|
||||||
|
@ -20,7 +19,6 @@ def prepare_data(riskdata_root="./riskdata", T=240, start_time="2016-01-01"):
|
||||||
riskmodel = StructuredCovEstimator()
|
riskmodel = StructuredCovEstimator()
|
||||||
|
|
||||||
for i in range(T - 1, len(price_all)):
|
for i in range(T - 1, len(price_all)):
|
||||||
|
|
||||||
date = price_all.index[i]
|
date = price_all.index[i]
|
||||||
ref_date = price_all.index[i - T + 1]
|
ref_date = price_all.index[i - T + 1]
|
||||||
|
|
||||||
|
@ -47,7 +45,6 @@ def prepare_data(riskdata_root="./riskdata", T=240, start_time="2016-01-01"):
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
import qlib
|
import qlib
|
||||||
|
|
||||||
qlib.init(provider_uri="~/.qlib/qlib_data/cn_data")
|
qlib.init(provider_uri="~/.qlib/qlib_data/cn_data")
|
||||||
|
|
|
@ -13,7 +13,6 @@ from qlib.tests.data import GetData
|
||||||
|
|
||||||
|
|
||||||
class RollingDataWorkflow:
|
class RollingDataWorkflow:
|
||||||
|
|
||||||
MARKET = "csi300"
|
MARKET = "csi300"
|
||||||
start_time = "2010-01-01"
|
start_time = "2010-01-01"
|
||||||
end_time = "2019-12-31"
|
end_time = "2019-12-31"
|
||||||
|
@ -93,7 +92,6 @@ class RollingDataWorkflow:
|
||||||
dataset = init_instance_by_config(dataset_config)
|
dataset = init_instance_by_config(dataset_config)
|
||||||
|
|
||||||
for rolling_offset in range(self.rolling_cnt):
|
for rolling_offset in range(self.rolling_cnt):
|
||||||
|
|
||||||
print(f"===========rolling{rolling_offset} start===========")
|
print(f"===========rolling{rolling_offset} start===========")
|
||||||
if rolling_offset:
|
if rolling_offset:
|
||||||
dataset.config(
|
dataset.config(
|
||||||
|
|
|
@ -17,7 +17,6 @@ from qlib.tests.config import CSI300_BENCH, CSI300_GBDT_TASK
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
# use default data
|
# use default data
|
||||||
provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir
|
provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir
|
||||||
GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
|
GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
|
||||||
|
|
|
@ -77,7 +77,6 @@ def init(default_conf="client", **kwargs):
|
||||||
|
|
||||||
|
|
||||||
def _mount_nfs_uri(provider_uri, mount_path, auto_mount: bool = False):
|
def _mount_nfs_uri(provider_uri, mount_path, auto_mount: bool = False):
|
||||||
|
|
||||||
LOG = get_module_logger("mount nfs", level=logging.INFO)
|
LOG = get_module_logger("mount nfs", level=logging.INFO)
|
||||||
if mount_path is None:
|
if mount_path is None:
|
||||||
raise ValueError(f"Invalid mount path: {mount_path}!")
|
raise ValueError(f"Invalid mount path: {mount_path}!")
|
||||||
|
|
|
@ -182,7 +182,6 @@ def get_strategy_executor(
|
||||||
exchange_kwargs: dict = {},
|
exchange_kwargs: dict = {},
|
||||||
pos_type: str = "Position",
|
pos_type: str = "Position",
|
||||||
) -> Tuple[BaseStrategy, BaseExecutor]:
|
) -> Tuple[BaseStrategy, BaseExecutor]:
|
||||||
|
|
||||||
# NOTE:
|
# NOTE:
|
||||||
# - for avoiding recursive import
|
# - for avoiding recursive import
|
||||||
# - typing annotations is not reliable
|
# - typing annotations is not reliable
|
||||||
|
|
|
@ -638,7 +638,6 @@ class Exchange:
|
||||||
random.seed(0)
|
random.seed(0)
|
||||||
random.shuffle(sorted_ids)
|
random.shuffle(sorted_ids)
|
||||||
for stock_id in sorted_ids:
|
for stock_id in sorted_ids:
|
||||||
|
|
||||||
# Do not generate order for the non-tradable stocks
|
# Do not generate order for the non-tradable stocks
|
||||||
if not self.is_stock_tradable(stock_id=stock_id, start_time=start_time, end_time=end_time):
|
if not self.is_stock_tradable(stock_id=stock_id, start_time=start_time, end_time=end_time):
|
||||||
continue
|
continue
|
||||||
|
|
|
@ -293,7 +293,6 @@ class QlibConfig(Config):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, provider_uri: Union[str, Path, dict], mount_path: Union[str, Path, dict]):
|
def __init__(self, provider_uri: Union[str, Path, dict], mount_path: Union[str, Path, dict]):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
The relation of `provider_uri` and `mount_path`
|
The relation of `provider_uri` and `mount_path`
|
||||||
- `mount_path` is used only if provider_uri is an NFS path
|
- `mount_path` is used only if provider_uri is an NFS path
|
||||||
|
|
|
@ -130,7 +130,6 @@ class MTSDatasetH(DatasetH):
|
||||||
input_size=None,
|
input_size=None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
|
|
||||||
assert num_states == 0 or horizon > 0, "please specify `horizon` to avoid data leakage"
|
assert num_states == 0 or horizon > 0, "please specify `horizon` to avoid data leakage"
|
||||||
assert memory_mode in ["sample", "daily"], "unsupported memory mode"
|
assert memory_mode in ["sample", "daily"], "unsupported memory mode"
|
||||||
assert memory_mode == "sample" or batch_size < 0, "daily memory requires daily sampling (`batch_size < 0`)"
|
assert memory_mode == "sample" or batch_size < 0, "daily memory requires daily sampling (`batch_size < 0`)"
|
||||||
|
@ -153,7 +152,6 @@ class MTSDatasetH(DatasetH):
|
||||||
super().__init__(handler, segments, **kwargs)
|
super().__init__(handler, segments, **kwargs)
|
||||||
|
|
||||||
def setup_data(self, handler_kwargs: dict = None, **kwargs):
|
def setup_data(self, handler_kwargs: dict = None, **kwargs):
|
||||||
|
|
||||||
super().setup_data(**kwargs)
|
super().setup_data(**kwargs)
|
||||||
|
|
||||||
if handler_kwargs is not None:
|
if handler_kwargs is not None:
|
||||||
|
@ -288,7 +286,6 @@ class MTSDatasetH(DatasetH):
|
||||||
daily_count = [] # store number of samples for each day
|
daily_count = [] # store number of samples for each day
|
||||||
|
|
||||||
for j in indices[i : i + batch_size]:
|
for j in indices[i : i + batch_size]:
|
||||||
|
|
||||||
# normal sampling: self.batch_size > 0 => slices is a list => slices_subset is a slice
|
# normal sampling: self.batch_size > 0 => slices is a list => slices_subset is a slice
|
||||||
# daily sampling: self.batch_size < 0 => slices is a nested list => slices_subset is a list
|
# daily sampling: self.batch_size < 0 => slices is a nested list => slices_subset is a list
|
||||||
slices_subset = slices[j]
|
slices_subset = slices[j]
|
||||||
|
@ -297,7 +294,6 @@ class MTSDatasetH(DatasetH):
|
||||||
# each slices_subset contains a list of slices for multiple stocks
|
# each slices_subset contains a list of slices for multiple stocks
|
||||||
# NOTE: daily sampling is used in 1) eval mode, 2) train mode with self.batch_size < 0
|
# NOTE: daily sampling is used in 1) eval mode, 2) train mode with self.batch_size < 0
|
||||||
if self.batch_size < 0:
|
if self.batch_size < 0:
|
||||||
|
|
||||||
# store daily index
|
# store daily index
|
||||||
idx = self._daily_index.index[j] # daily_index.index is the index of the original data
|
idx = self._daily_index.index[j] # daily_index.index is the index of the original data
|
||||||
daily_index.append(idx)
|
daily_index.append(idx)
|
||||||
|
@ -320,7 +316,6 @@ class MTSDatasetH(DatasetH):
|
||||||
slices_subset = [slices_subset]
|
slices_subset = [slices_subset]
|
||||||
|
|
||||||
for slc in slices_subset:
|
for slc in slices_subset:
|
||||||
|
|
||||||
# legacy support for Alpha360 data by `input_size`
|
# legacy support for Alpha360 data by `input_size`
|
||||||
if self.input_size:
|
if self.input_size:
|
||||||
data.append(self._data[slc.stop - 1].reshape(self.input_size, -1).T)
|
data.append(self._data[slc.stop - 1].reshape(self.input_size, -1).T)
|
||||||
|
|
|
@ -17,7 +17,6 @@ class HighFreqHandler(DataHandlerLP):
|
||||||
fit_end_time=None,
|
fit_end_time=None,
|
||||||
drop_raw=True,
|
drop_raw=True,
|
||||||
):
|
):
|
||||||
|
|
||||||
infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
|
infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
|
||||||
learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
|
learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
|
||||||
|
|
||||||
|
@ -318,7 +317,6 @@ class HighFreqOrderHandler(DataHandlerLP):
|
||||||
inst_processors=None,
|
inst_processors=None,
|
||||||
drop_raw=True,
|
drop_raw=True,
|
||||||
):
|
):
|
||||||
|
|
||||||
infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
|
infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
|
||||||
learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
|
learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
|
||||||
|
|
||||||
|
|
|
@ -29,7 +29,6 @@ class HighFreqNorm(Processor):
|
||||||
feature_save_dir: str,
|
feature_save_dir: str,
|
||||||
norm_groups: Dict[str, int],
|
norm_groups: Dict[str, int],
|
||||||
):
|
):
|
||||||
|
|
||||||
self.fit_start_time = fit_start_time
|
self.fit_start_time = fit_start_time
|
||||||
self.fit_end_time = fit_end_time
|
self.fit_end_time = fit_end_time
|
||||||
self.feature_save_dir = feature_save_dir
|
self.feature_save_dir = feature_save_dir
|
||||||
|
|
|
@ -49,6 +49,8 @@ class InternalData:
|
||||||
|
|
||||||
# 1) prepare the prediction of proxy models
|
# 1) prepare the prediction of proxy models
|
||||||
perf_task_tpl = deepcopy(self.task_tpl) # this task is supposed to contains no complicated objects
|
perf_task_tpl = deepcopy(self.task_tpl) # this task is supposed to contains no complicated objects
|
||||||
|
# The only thing we want to save is the prediction
|
||||||
|
perf_task_tpl["record"] = ["qlib.workflow.record_temp.SignalRecord"]
|
||||||
|
|
||||||
trainer = auto_filter_kwargs(trainer)(experiment_name=self.exp_name, **trainer_kwargs)
|
trainer = auto_filter_kwargs(trainer)(experiment_name=self.exp_name, **trainer_kwargs)
|
||||||
# NOTE:
|
# NOTE:
|
||||||
|
|
|
@ -246,7 +246,6 @@ class ADARNN(Model):
|
||||||
evals_result=dict(),
|
evals_result=dict(),
|
||||||
save_path=None,
|
save_path=None,
|
||||||
):
|
):
|
||||||
|
|
||||||
df_train, df_valid = dataset.prepare(
|
df_train, df_valid = dataset.prepare(
|
||||||
["train", "valid"],
|
["train", "valid"],
|
||||||
col_set=["feature", "label"],
|
col_set=["feature", "label"],
|
||||||
|
@ -318,7 +317,6 @@ class ADARNN(Model):
|
||||||
preds = []
|
preds = []
|
||||||
|
|
||||||
for begin in range(sample_num)[:: self.batch_size]:
|
for begin in range(sample_num)[:: self.batch_size]:
|
||||||
|
|
||||||
if sample_num - begin < self.batch_size:
|
if sample_num - begin < self.batch_size:
|
||||||
end = sample_num
|
end = sample_num
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -146,7 +146,6 @@ class ALSTM(Model):
|
||||||
raise ValueError("unknown loss `%s`" % self.loss)
|
raise ValueError("unknown loss `%s`" % self.loss)
|
||||||
|
|
||||||
def metric_fn(self, pred, label):
|
def metric_fn(self, pred, label):
|
||||||
|
|
||||||
mask = torch.isfinite(label)
|
mask = torch.isfinite(label)
|
||||||
|
|
||||||
if self.metric in ("", "loss"):
|
if self.metric in ("", "loss"):
|
||||||
|
@ -155,7 +154,6 @@ class ALSTM(Model):
|
||||||
raise ValueError("unknown metric `%s`" % self.metric)
|
raise ValueError("unknown metric `%s`" % self.metric)
|
||||||
|
|
||||||
def train_epoch(self, x_train, y_train):
|
def train_epoch(self, x_train, y_train):
|
||||||
|
|
||||||
x_train_values = x_train.values
|
x_train_values = x_train.values
|
||||||
y_train_values = np.squeeze(y_train.values)
|
y_train_values = np.squeeze(y_train.values)
|
||||||
|
|
||||||
|
@ -165,7 +163,6 @@ class ALSTM(Model):
|
||||||
np.random.shuffle(indices)
|
np.random.shuffle(indices)
|
||||||
|
|
||||||
for i in range(len(indices))[:: self.batch_size]:
|
for i in range(len(indices))[:: self.batch_size]:
|
||||||
|
|
||||||
if len(indices) - i < self.batch_size:
|
if len(indices) - i < self.batch_size:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
@ -181,7 +178,6 @@ class ALSTM(Model):
|
||||||
self.train_optimizer.step()
|
self.train_optimizer.step()
|
||||||
|
|
||||||
def test_epoch(self, data_x, data_y):
|
def test_epoch(self, data_x, data_y):
|
||||||
|
|
||||||
# prepare training data
|
# prepare training data
|
||||||
x_values = data_x.values
|
x_values = data_x.values
|
||||||
y_values = np.squeeze(data_y.values)
|
y_values = np.squeeze(data_y.values)
|
||||||
|
@ -194,7 +190,6 @@ class ALSTM(Model):
|
||||||
indices = np.arange(len(x_values))
|
indices = np.arange(len(x_values))
|
||||||
|
|
||||||
for i in range(len(indices))[:: self.batch_size]:
|
for i in range(len(indices))[:: self.batch_size]:
|
||||||
|
|
||||||
if len(indices) - i < self.batch_size:
|
if len(indices) - i < self.batch_size:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
@ -217,7 +212,6 @@ class ALSTM(Model):
|
||||||
evals_result=dict(),
|
evals_result=dict(),
|
||||||
save_path=None,
|
save_path=None,
|
||||||
):
|
):
|
||||||
|
|
||||||
df_train, df_valid, df_test = dataset.prepare(
|
df_train, df_valid, df_test = dataset.prepare(
|
||||||
["train", "valid", "test"],
|
["train", "valid", "test"],
|
||||||
col_set=["feature", "label"],
|
col_set=["feature", "label"],
|
||||||
|
@ -282,7 +276,6 @@ class ALSTM(Model):
|
||||||
preds = []
|
preds = []
|
||||||
|
|
||||||
for begin in range(sample_num)[:: self.batch_size]:
|
for begin in range(sample_num)[:: self.batch_size]:
|
||||||
|
|
||||||
if sample_num - begin < self.batch_size:
|
if sample_num - begin < self.batch_size:
|
||||||
end = sample_num
|
end = sample_num
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -156,7 +156,6 @@ class ALSTM(Model):
|
||||||
raise ValueError("unknown loss `%s`" % self.loss)
|
raise ValueError("unknown loss `%s`" % self.loss)
|
||||||
|
|
||||||
def metric_fn(self, pred, label):
|
def metric_fn(self, pred, label):
|
||||||
|
|
||||||
mask = torch.isfinite(label)
|
mask = torch.isfinite(label)
|
||||||
|
|
||||||
if self.metric in ("", "loss"):
|
if self.metric in ("", "loss"):
|
||||||
|
@ -165,10 +164,9 @@ class ALSTM(Model):
|
||||||
raise ValueError("unknown metric `%s`" % self.metric)
|
raise ValueError("unknown metric `%s`" % self.metric)
|
||||||
|
|
||||||
def train_epoch(self, data_loader):
|
def train_epoch(self, data_loader):
|
||||||
|
|
||||||
self.ALSTM_model.train()
|
self.ALSTM_model.train()
|
||||||
|
|
||||||
for (data, weight) in data_loader:
|
for data, weight in data_loader:
|
||||||
feature = data[:, :, 0:-1].to(self.device)
|
feature = data[:, :, 0:-1].to(self.device)
|
||||||
label = data[:, -1, -1].to(self.device)
|
label = data[:, -1, -1].to(self.device)
|
||||||
|
|
||||||
|
@ -181,14 +179,12 @@ class ALSTM(Model):
|
||||||
self.train_optimizer.step()
|
self.train_optimizer.step()
|
||||||
|
|
||||||
def test_epoch(self, data_loader):
|
def test_epoch(self, data_loader):
|
||||||
|
|
||||||
self.ALSTM_model.eval()
|
self.ALSTM_model.eval()
|
||||||
|
|
||||||
scores = []
|
scores = []
|
||||||
losses = []
|
losses = []
|
||||||
|
|
||||||
for (data, weight) in data_loader:
|
for data, weight in data_loader:
|
||||||
|
|
||||||
feature = data[:, :, 0:-1].to(self.device)
|
feature = data[:, :, 0:-1].to(self.device)
|
||||||
# feature[torch.isnan(feature)] = 0
|
# feature[torch.isnan(feature)] = 0
|
||||||
label = data[:, -1, -1].to(self.device)
|
label = data[:, -1, -1].to(self.device)
|
||||||
|
@ -295,7 +291,6 @@ class ALSTM(Model):
|
||||||
preds = []
|
preds = []
|
||||||
|
|
||||||
for data in test_loader:
|
for data in test_loader:
|
||||||
|
|
||||||
feature = data[:, :, 0:-1].to(self.device)
|
feature = data[:, :, 0:-1].to(self.device)
|
||||||
|
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
|
|
|
@ -154,7 +154,6 @@ class GATs(Model):
|
||||||
raise ValueError("unknown loss `%s`" % self.loss)
|
raise ValueError("unknown loss `%s`" % self.loss)
|
||||||
|
|
||||||
def metric_fn(self, pred, label):
|
def metric_fn(self, pred, label):
|
||||||
|
|
||||||
mask = torch.isfinite(label)
|
mask = torch.isfinite(label)
|
||||||
|
|
||||||
if self.metric in ("", "loss"):
|
if self.metric in ("", "loss"):
|
||||||
|
@ -175,7 +174,6 @@ class GATs(Model):
|
||||||
return daily_index, daily_count
|
return daily_index, daily_count
|
||||||
|
|
||||||
def train_epoch(self, x_train, y_train):
|
def train_epoch(self, x_train, y_train):
|
||||||
|
|
||||||
x_train_values = x_train.values
|
x_train_values = x_train.values
|
||||||
y_train_values = np.squeeze(y_train.values)
|
y_train_values = np.squeeze(y_train.values)
|
||||||
self.GAT_model.train()
|
self.GAT_model.train()
|
||||||
|
@ -197,7 +195,6 @@ class GATs(Model):
|
||||||
self.train_optimizer.step()
|
self.train_optimizer.step()
|
||||||
|
|
||||||
def test_epoch(self, data_x, data_y):
|
def test_epoch(self, data_x, data_y):
|
||||||
|
|
||||||
# prepare training data
|
# prepare training data
|
||||||
x_values = data_x.values
|
x_values = data_x.values
|
||||||
y_values = np.squeeze(data_y.values)
|
y_values = np.squeeze(data_y.values)
|
||||||
|
@ -230,7 +227,6 @@ class GATs(Model):
|
||||||
evals_result=dict(),
|
evals_result=dict(),
|
||||||
save_path=None,
|
save_path=None,
|
||||||
):
|
):
|
||||||
|
|
||||||
df_train, df_valid, df_test = dataset.prepare(
|
df_train, df_valid, df_test = dataset.prepare(
|
||||||
["train", "valid", "test"],
|
["train", "valid", "test"],
|
||||||
col_set=["feature", "label"],
|
col_set=["feature", "label"],
|
||||||
|
|
|
@ -32,7 +32,6 @@ class DailyBatchSampler(Sampler):
|
||||||
self.daily_index[0] = 0
|
self.daily_index[0] = 0
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
|
|
||||||
for idx, count in zip(self.daily_index, self.daily_count):
|
for idx, count in zip(self.daily_index, self.daily_count):
|
||||||
yield np.arange(idx, idx + count)
|
yield np.arange(idx, idx + count)
|
||||||
|
|
||||||
|
@ -173,7 +172,6 @@ class GATs(Model):
|
||||||
raise ValueError("unknown loss `%s`" % self.loss)
|
raise ValueError("unknown loss `%s`" % self.loss)
|
||||||
|
|
||||||
def metric_fn(self, pred, label):
|
def metric_fn(self, pred, label):
|
||||||
|
|
||||||
mask = torch.isfinite(label)
|
mask = torch.isfinite(label)
|
||||||
|
|
||||||
if self.metric in ("", "loss"):
|
if self.metric in ("", "loss"):
|
||||||
|
@ -194,11 +192,9 @@ class GATs(Model):
|
||||||
return daily_index, daily_count
|
return daily_index, daily_count
|
||||||
|
|
||||||
def train_epoch(self, data_loader):
|
def train_epoch(self, data_loader):
|
||||||
|
|
||||||
self.GAT_model.train()
|
self.GAT_model.train()
|
||||||
|
|
||||||
for data in data_loader:
|
for data in data_loader:
|
||||||
|
|
||||||
data = data.squeeze()
|
data = data.squeeze()
|
||||||
feature = data[:, :, 0:-1].to(self.device)
|
feature = data[:, :, 0:-1].to(self.device)
|
||||||
label = data[:, -1, -1].to(self.device)
|
label = data[:, -1, -1].to(self.device)
|
||||||
|
@ -212,14 +208,12 @@ class GATs(Model):
|
||||||
self.train_optimizer.step()
|
self.train_optimizer.step()
|
||||||
|
|
||||||
def test_epoch(self, data_loader):
|
def test_epoch(self, data_loader):
|
||||||
|
|
||||||
self.GAT_model.eval()
|
self.GAT_model.eval()
|
||||||
|
|
||||||
scores = []
|
scores = []
|
||||||
losses = []
|
losses = []
|
||||||
|
|
||||||
for data in data_loader:
|
for data in data_loader:
|
||||||
|
|
||||||
data = data.squeeze()
|
data = data.squeeze()
|
||||||
feature = data[:, :, 0:-1].to(self.device)
|
feature = data[:, :, 0:-1].to(self.device)
|
||||||
# feature[torch.isnan(feature)] = 0
|
# feature[torch.isnan(feature)] = 0
|
||||||
|
@ -240,7 +234,6 @@ class GATs(Model):
|
||||||
evals_result=dict(),
|
evals_result=dict(),
|
||||||
save_path=None,
|
save_path=None,
|
||||||
):
|
):
|
||||||
|
|
||||||
dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
|
dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
|
||||||
dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
|
dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
|
||||||
if dl_train.empty or dl_valid.empty:
|
if dl_train.empty or dl_valid.empty:
|
||||||
|
@ -329,7 +322,6 @@ class GATs(Model):
|
||||||
preds = []
|
preds = []
|
||||||
|
|
||||||
for data in test_loader:
|
for data in test_loader:
|
||||||
|
|
||||||
data = data.squeeze()
|
data = data.squeeze()
|
||||||
feature = data[:, :, 0:-1].to(self.device)
|
feature = data[:, :, 0:-1].to(self.device)
|
||||||
|
|
||||||
|
|
|
@ -146,7 +146,6 @@ class GRU(Model):
|
||||||
raise ValueError("unknown loss `%s`" % self.loss)
|
raise ValueError("unknown loss `%s`" % self.loss)
|
||||||
|
|
||||||
def metric_fn(self, pred, label):
|
def metric_fn(self, pred, label):
|
||||||
|
|
||||||
mask = torch.isfinite(label)
|
mask = torch.isfinite(label)
|
||||||
|
|
||||||
if self.metric in ("", "loss"):
|
if self.metric in ("", "loss"):
|
||||||
|
@ -155,7 +154,6 @@ class GRU(Model):
|
||||||
raise ValueError("unknown metric `%s`" % self.metric)
|
raise ValueError("unknown metric `%s`" % self.metric)
|
||||||
|
|
||||||
def train_epoch(self, x_train, y_train):
|
def train_epoch(self, x_train, y_train):
|
||||||
|
|
||||||
x_train_values = x_train.values
|
x_train_values = x_train.values
|
||||||
y_train_values = np.squeeze(y_train.values)
|
y_train_values = np.squeeze(y_train.values)
|
||||||
|
|
||||||
|
@ -165,7 +163,6 @@ class GRU(Model):
|
||||||
np.random.shuffle(indices)
|
np.random.shuffle(indices)
|
||||||
|
|
||||||
for i in range(len(indices))[:: self.batch_size]:
|
for i in range(len(indices))[:: self.batch_size]:
|
||||||
|
|
||||||
if len(indices) - i < self.batch_size:
|
if len(indices) - i < self.batch_size:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
@ -181,7 +178,6 @@ class GRU(Model):
|
||||||
self.train_optimizer.step()
|
self.train_optimizer.step()
|
||||||
|
|
||||||
def test_epoch(self, data_x, data_y):
|
def test_epoch(self, data_x, data_y):
|
||||||
|
|
||||||
# prepare training data
|
# prepare training data
|
||||||
x_values = data_x.values
|
x_values = data_x.values
|
||||||
y_values = np.squeeze(data_y.values)
|
y_values = np.squeeze(data_y.values)
|
||||||
|
@ -194,7 +190,6 @@ class GRU(Model):
|
||||||
indices = np.arange(len(x_values))
|
indices = np.arange(len(x_values))
|
||||||
|
|
||||||
for i in range(len(indices))[:: self.batch_size]:
|
for i in range(len(indices))[:: self.batch_size]:
|
||||||
|
|
||||||
if len(indices) - i < self.batch_size:
|
if len(indices) - i < self.batch_size:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
@ -217,7 +212,6 @@ class GRU(Model):
|
||||||
evals_result=dict(),
|
evals_result=dict(),
|
||||||
save_path=None,
|
save_path=None,
|
||||||
):
|
):
|
||||||
|
|
||||||
df_train, df_valid, df_test = dataset.prepare(
|
df_train, df_valid, df_test = dataset.prepare(
|
||||||
["train", "valid", "test"],
|
["train", "valid", "test"],
|
||||||
col_set=["feature", "label"],
|
col_set=["feature", "label"],
|
||||||
|
@ -282,7 +276,6 @@ class GRU(Model):
|
||||||
preds = []
|
preds = []
|
||||||
|
|
||||||
for begin in range(sample_num)[:: self.batch_size]:
|
for begin in range(sample_num)[:: self.batch_size]:
|
||||||
|
|
||||||
if sample_num - begin < self.batch_size:
|
if sample_num - begin < self.batch_size:
|
||||||
end = sample_num
|
end = sample_num
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -154,7 +154,6 @@ class GRU(Model):
|
||||||
raise ValueError("unknown loss `%s`" % self.loss)
|
raise ValueError("unknown loss `%s`" % self.loss)
|
||||||
|
|
||||||
def metric_fn(self, pred, label):
|
def metric_fn(self, pred, label):
|
||||||
|
|
||||||
mask = torch.isfinite(label)
|
mask = torch.isfinite(label)
|
||||||
|
|
||||||
if self.metric in ("", "loss"):
|
if self.metric in ("", "loss"):
|
||||||
|
@ -163,10 +162,9 @@ class GRU(Model):
|
||||||
raise ValueError("unknown metric `%s`" % self.metric)
|
raise ValueError("unknown metric `%s`" % self.metric)
|
||||||
|
|
||||||
def train_epoch(self, data_loader):
|
def train_epoch(self, data_loader):
|
||||||
|
|
||||||
self.GRU_model.train()
|
self.GRU_model.train()
|
||||||
|
|
||||||
for (data, weight) in data_loader:
|
for data, weight in data_loader:
|
||||||
feature = data[:, :, 0:-1].to(self.device)
|
feature = data[:, :, 0:-1].to(self.device)
|
||||||
label = data[:, -1, -1].to(self.device)
|
label = data[:, -1, -1].to(self.device)
|
||||||
|
|
||||||
|
@ -179,14 +177,12 @@ class GRU(Model):
|
||||||
self.train_optimizer.step()
|
self.train_optimizer.step()
|
||||||
|
|
||||||
def test_epoch(self, data_loader):
|
def test_epoch(self, data_loader):
|
||||||
|
|
||||||
self.GRU_model.eval()
|
self.GRU_model.eval()
|
||||||
|
|
||||||
scores = []
|
scores = []
|
||||||
losses = []
|
losses = []
|
||||||
|
|
||||||
for (data, weight) in data_loader:
|
for data, weight in data_loader:
|
||||||
|
|
||||||
feature = data[:, :, 0:-1].to(self.device)
|
feature = data[:, :, 0:-1].to(self.device)
|
||||||
# feature[torch.isnan(feature)] = 0
|
# feature[torch.isnan(feature)] = 0
|
||||||
label = data[:, -1, -1].to(self.device)
|
label = data[:, -1, -1].to(self.device)
|
||||||
|
@ -293,7 +289,6 @@ class GRU(Model):
|
||||||
preds = []
|
preds = []
|
||||||
|
|
||||||
for data in test_loader:
|
for data in test_loader:
|
||||||
|
|
||||||
feature = data[:, :, 0:-1].to(self.device)
|
feature = data[:, :, 0:-1].to(self.device)
|
||||||
|
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
|
|
|
@ -160,7 +160,6 @@ class HIST(Model):
|
||||||
raise ValueError("unknown loss `%s`" % self.loss)
|
raise ValueError("unknown loss `%s`" % self.loss)
|
||||||
|
|
||||||
def metric_fn(self, pred, label):
|
def metric_fn(self, pred, label):
|
||||||
|
|
||||||
mask = torch.isfinite(label)
|
mask = torch.isfinite(label)
|
||||||
|
|
||||||
if self.metric == "ic":
|
if self.metric == "ic":
|
||||||
|
@ -189,7 +188,6 @@ class HIST(Model):
|
||||||
return daily_index, daily_count
|
return daily_index, daily_count
|
||||||
|
|
||||||
def train_epoch(self, x_train, y_train, stock_index):
|
def train_epoch(self, x_train, y_train, stock_index):
|
||||||
|
|
||||||
stock2concept_matrix = np.load(self.stock2concept)
|
stock2concept_matrix = np.load(self.stock2concept)
|
||||||
x_train_values = x_train.values
|
x_train_values = x_train.values
|
||||||
y_train_values = np.squeeze(y_train.values)
|
y_train_values = np.squeeze(y_train.values)
|
||||||
|
@ -214,7 +212,6 @@ class HIST(Model):
|
||||||
self.train_optimizer.step()
|
self.train_optimizer.step()
|
||||||
|
|
||||||
def test_epoch(self, data_x, data_y, stock_index):
|
def test_epoch(self, data_x, data_y, stock_index):
|
||||||
|
|
||||||
# prepare training data
|
# prepare training data
|
||||||
stock2concept_matrix = np.load(self.stock2concept)
|
stock2concept_matrix = np.load(self.stock2concept)
|
||||||
x_values = data_x.values
|
x_values = data_x.values
|
||||||
|
|
|
@ -153,7 +153,6 @@ class IGMTF(Model):
|
||||||
raise ValueError("unknown loss `%s`" % self.loss)
|
raise ValueError("unknown loss `%s`" % self.loss)
|
||||||
|
|
||||||
def metric_fn(self, pred, label):
|
def metric_fn(self, pred, label):
|
||||||
|
|
||||||
mask = torch.isfinite(label)
|
mask = torch.isfinite(label)
|
||||||
|
|
||||||
if self.metric == "ic":
|
if self.metric == "ic":
|
||||||
|
@ -201,7 +200,6 @@ class IGMTF(Model):
|
||||||
return train_hidden, train_hidden_day
|
return train_hidden, train_hidden_day
|
||||||
|
|
||||||
def train_epoch(self, x_train, y_train, train_hidden, train_hidden_day):
|
def train_epoch(self, x_train, y_train, train_hidden, train_hidden_day):
|
||||||
|
|
||||||
x_train_values = x_train.values
|
x_train_values = x_train.values
|
||||||
y_train_values = np.squeeze(y_train.values)
|
y_train_values = np.squeeze(y_train.values)
|
||||||
|
|
||||||
|
@ -222,7 +220,6 @@ class IGMTF(Model):
|
||||||
self.train_optimizer.step()
|
self.train_optimizer.step()
|
||||||
|
|
||||||
def test_epoch(self, data_x, data_y, train_hidden, train_hidden_day):
|
def test_epoch(self, data_x, data_y, train_hidden, train_hidden_day):
|
||||||
|
|
||||||
# prepare training data
|
# prepare training data
|
||||||
x_values = data_x.values
|
x_values = data_x.values
|
||||||
y_values = np.squeeze(data_y.values)
|
y_values = np.squeeze(data_y.values)
|
||||||
|
@ -254,7 +251,6 @@ class IGMTF(Model):
|
||||||
evals_result=dict(),
|
evals_result=dict(),
|
||||||
save_path=None,
|
save_path=None,
|
||||||
):
|
):
|
||||||
|
|
||||||
df_train, df_valid = dataset.prepare(
|
df_train, df_valid = dataset.prepare(
|
||||||
["train", "valid"],
|
["train", "valid"],
|
||||||
col_set=["feature", "label"],
|
col_set=["feature", "label"],
|
||||||
|
|
|
@ -46,7 +46,6 @@ class LocalformerModel(Model):
|
||||||
seed=None,
|
seed=None,
|
||||||
**kwargs
|
**kwargs
|
||||||
):
|
):
|
||||||
|
|
||||||
# set hyper-parameters.
|
# set hyper-parameters.
|
||||||
self.d_model = d_model
|
self.d_model = d_model
|
||||||
self.dropout = dropout
|
self.dropout = dropout
|
||||||
|
@ -96,7 +95,6 @@ class LocalformerModel(Model):
|
||||||
raise ValueError("unknown loss `%s`" % self.loss)
|
raise ValueError("unknown loss `%s`" % self.loss)
|
||||||
|
|
||||||
def metric_fn(self, pred, label):
|
def metric_fn(self, pred, label):
|
||||||
|
|
||||||
mask = torch.isfinite(label)
|
mask = torch.isfinite(label)
|
||||||
|
|
||||||
if self.metric in ("", "loss"):
|
if self.metric in ("", "loss"):
|
||||||
|
@ -105,7 +103,6 @@ class LocalformerModel(Model):
|
||||||
raise ValueError("unknown metric `%s`" % self.metric)
|
raise ValueError("unknown metric `%s`" % self.metric)
|
||||||
|
|
||||||
def train_epoch(self, x_train, y_train):
|
def train_epoch(self, x_train, y_train):
|
||||||
|
|
||||||
x_train_values = x_train.values
|
x_train_values = x_train.values
|
||||||
y_train_values = np.squeeze(y_train.values)
|
y_train_values = np.squeeze(y_train.values)
|
||||||
|
|
||||||
|
@ -115,7 +112,6 @@ class LocalformerModel(Model):
|
||||||
np.random.shuffle(indices)
|
np.random.shuffle(indices)
|
||||||
|
|
||||||
for i in range(len(indices))[:: self.batch_size]:
|
for i in range(len(indices))[:: self.batch_size]:
|
||||||
|
|
||||||
if len(indices) - i < self.batch_size:
|
if len(indices) - i < self.batch_size:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
@ -131,7 +127,6 @@ class LocalformerModel(Model):
|
||||||
self.train_optimizer.step()
|
self.train_optimizer.step()
|
||||||
|
|
||||||
def test_epoch(self, data_x, data_y):
|
def test_epoch(self, data_x, data_y):
|
||||||
|
|
||||||
# prepare training data
|
# prepare training data
|
||||||
x_values = data_x.values
|
x_values = data_x.values
|
||||||
y_values = np.squeeze(data_y.values)
|
y_values = np.squeeze(data_y.values)
|
||||||
|
@ -144,7 +139,6 @@ class LocalformerModel(Model):
|
||||||
indices = np.arange(len(x_values))
|
indices = np.arange(len(x_values))
|
||||||
|
|
||||||
for i in range(len(indices))[:: self.batch_size]:
|
for i in range(len(indices))[:: self.batch_size]:
|
||||||
|
|
||||||
if len(indices) - i < self.batch_size:
|
if len(indices) - i < self.batch_size:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
@ -167,7 +161,6 @@ class LocalformerModel(Model):
|
||||||
evals_result=dict(),
|
evals_result=dict(),
|
||||||
save_path=None,
|
save_path=None,
|
||||||
):
|
):
|
||||||
|
|
||||||
df_train, df_valid, df_test = dataset.prepare(
|
df_train, df_valid, df_test = dataset.prepare(
|
||||||
["train", "valid", "test"],
|
["train", "valid", "test"],
|
||||||
col_set=["feature", "label"],
|
col_set=["feature", "label"],
|
||||||
|
@ -232,7 +225,6 @@ class LocalformerModel(Model):
|
||||||
preds = []
|
preds = []
|
||||||
|
|
||||||
for begin in range(sample_num)[:: self.batch_size]:
|
for begin in range(sample_num)[:: self.batch_size]:
|
||||||
|
|
||||||
if sample_num - begin < self.batch_size:
|
if sample_num - begin < self.batch_size:
|
||||||
end = sample_num
|
end = sample_num
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -44,7 +44,6 @@ class LocalformerModel(Model):
|
||||||
seed=None,
|
seed=None,
|
||||||
**kwargs
|
**kwargs
|
||||||
):
|
):
|
||||||
|
|
||||||
# set hyper-parameters.
|
# set hyper-parameters.
|
||||||
self.d_model = d_model
|
self.d_model = d_model
|
||||||
self.dropout = dropout
|
self.dropout = dropout
|
||||||
|
@ -96,7 +95,6 @@ class LocalformerModel(Model):
|
||||||
raise ValueError("unknown loss `%s`" % self.loss)
|
raise ValueError("unknown loss `%s`" % self.loss)
|
||||||
|
|
||||||
def metric_fn(self, pred, label):
|
def metric_fn(self, pred, label):
|
||||||
|
|
||||||
mask = torch.isfinite(label)
|
mask = torch.isfinite(label)
|
||||||
|
|
||||||
if self.metric in ("", "loss"):
|
if self.metric in ("", "loss"):
|
||||||
|
@ -105,7 +103,6 @@ class LocalformerModel(Model):
|
||||||
raise ValueError("unknown metric `%s`" % self.metric)
|
raise ValueError("unknown metric `%s`" % self.metric)
|
||||||
|
|
||||||
def train_epoch(self, data_loader):
|
def train_epoch(self, data_loader):
|
||||||
|
|
||||||
self.model.train()
|
self.model.train()
|
||||||
|
|
||||||
for data in data_loader:
|
for data in data_loader:
|
||||||
|
@ -121,14 +118,12 @@ class LocalformerModel(Model):
|
||||||
self.train_optimizer.step()
|
self.train_optimizer.step()
|
||||||
|
|
||||||
def test_epoch(self, data_loader):
|
def test_epoch(self, data_loader):
|
||||||
|
|
||||||
self.model.eval()
|
self.model.eval()
|
||||||
|
|
||||||
scores = []
|
scores = []
|
||||||
losses = []
|
losses = []
|
||||||
|
|
||||||
for data in data_loader:
|
for data in data_loader:
|
||||||
|
|
||||||
feature = data[:, :, 0:-1].to(self.device)
|
feature = data[:, :, 0:-1].to(self.device)
|
||||||
label = data[:, -1, -1].to(self.device)
|
label = data[:, -1, -1].to(self.device)
|
||||||
|
|
||||||
|
@ -148,7 +143,6 @@ class LocalformerModel(Model):
|
||||||
evals_result=dict(),
|
evals_result=dict(),
|
||||||
save_path=None,
|
save_path=None,
|
||||||
):
|
):
|
||||||
|
|
||||||
dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
|
dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
|
||||||
dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
|
dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
|
||||||
if dl_train.empty or dl_valid.empty:
|
if dl_train.empty or dl_valid.empty:
|
||||||
|
|
|
@ -142,7 +142,6 @@ class LSTM(Model):
|
||||||
raise ValueError("unknown loss `%s`" % self.loss)
|
raise ValueError("unknown loss `%s`" % self.loss)
|
||||||
|
|
||||||
def metric_fn(self, pred, label):
|
def metric_fn(self, pred, label):
|
||||||
|
|
||||||
mask = torch.isfinite(label)
|
mask = torch.isfinite(label)
|
||||||
|
|
||||||
if self.metric in ("", "loss"):
|
if self.metric in ("", "loss"):
|
||||||
|
@ -151,7 +150,6 @@ class LSTM(Model):
|
||||||
raise ValueError("unknown metric `%s`" % self.metric)
|
raise ValueError("unknown metric `%s`" % self.metric)
|
||||||
|
|
||||||
def train_epoch(self, x_train, y_train):
|
def train_epoch(self, x_train, y_train):
|
||||||
|
|
||||||
x_train_values = x_train.values
|
x_train_values = x_train.values
|
||||||
y_train_values = np.squeeze(y_train.values)
|
y_train_values = np.squeeze(y_train.values)
|
||||||
|
|
||||||
|
@ -161,7 +159,6 @@ class LSTM(Model):
|
||||||
np.random.shuffle(indices)
|
np.random.shuffle(indices)
|
||||||
|
|
||||||
for i in range(len(indices))[:: self.batch_size]:
|
for i in range(len(indices))[:: self.batch_size]:
|
||||||
|
|
||||||
if len(indices) - i < self.batch_size:
|
if len(indices) - i < self.batch_size:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
@ -177,7 +174,6 @@ class LSTM(Model):
|
||||||
self.train_optimizer.step()
|
self.train_optimizer.step()
|
||||||
|
|
||||||
def test_epoch(self, data_x, data_y):
|
def test_epoch(self, data_x, data_y):
|
||||||
|
|
||||||
# prepare training data
|
# prepare training data
|
||||||
x_values = data_x.values
|
x_values = data_x.values
|
||||||
y_values = np.squeeze(data_y.values)
|
y_values = np.squeeze(data_y.values)
|
||||||
|
@ -190,7 +186,6 @@ class LSTM(Model):
|
||||||
indices = np.arange(len(x_values))
|
indices = np.arange(len(x_values))
|
||||||
|
|
||||||
for i in range(len(indices))[:: self.batch_size]:
|
for i in range(len(indices))[:: self.batch_size]:
|
||||||
|
|
||||||
if len(indices) - i < self.batch_size:
|
if len(indices) - i < self.batch_size:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
@ -212,7 +207,6 @@ class LSTM(Model):
|
||||||
evals_result=dict(),
|
evals_result=dict(),
|
||||||
save_path=None,
|
save_path=None,
|
||||||
):
|
):
|
||||||
|
|
||||||
df_train, df_valid, df_test = dataset.prepare(
|
df_train, df_valid, df_test = dataset.prepare(
|
||||||
["train", "valid", "test"],
|
["train", "valid", "test"],
|
||||||
col_set=["feature", "label"],
|
col_set=["feature", "label"],
|
||||||
|
|
|
@ -150,7 +150,6 @@ class LSTM(Model):
|
||||||
raise ValueError("unknown loss `%s`" % self.loss)
|
raise ValueError("unknown loss `%s`" % self.loss)
|
||||||
|
|
||||||
def metric_fn(self, pred, label):
|
def metric_fn(self, pred, label):
|
||||||
|
|
||||||
mask = torch.isfinite(label)
|
mask = torch.isfinite(label)
|
||||||
|
|
||||||
if self.metric in ("", "loss"):
|
if self.metric in ("", "loss"):
|
||||||
|
@ -159,10 +158,9 @@ class LSTM(Model):
|
||||||
raise ValueError("unknown metric `%s`" % self.metric)
|
raise ValueError("unknown metric `%s`" % self.metric)
|
||||||
|
|
||||||
def train_epoch(self, data_loader):
|
def train_epoch(self, data_loader):
|
||||||
|
|
||||||
self.LSTM_model.train()
|
self.LSTM_model.train()
|
||||||
|
|
||||||
for (data, weight) in data_loader:
|
for data, weight in data_loader:
|
||||||
feature = data[:, :, 0:-1].to(self.device)
|
feature = data[:, :, 0:-1].to(self.device)
|
||||||
label = data[:, -1, -1].to(self.device)
|
label = data[:, -1, -1].to(self.device)
|
||||||
|
|
||||||
|
@ -175,14 +173,12 @@ class LSTM(Model):
|
||||||
self.train_optimizer.step()
|
self.train_optimizer.step()
|
||||||
|
|
||||||
def test_epoch(self, data_loader):
|
def test_epoch(self, data_loader):
|
||||||
|
|
||||||
self.LSTM_model.eval()
|
self.LSTM_model.eval()
|
||||||
|
|
||||||
scores = []
|
scores = []
|
||||||
losses = []
|
losses = []
|
||||||
|
|
||||||
for (data, weight) in data_loader:
|
for data, weight in data_loader:
|
||||||
|
|
||||||
feature = data[:, :, 0:-1].to(self.device)
|
feature = data[:, :, 0:-1].to(self.device)
|
||||||
# feature[torch.isnan(feature)] = 0
|
# feature[torch.isnan(feature)] = 0
|
||||||
label = data[:, -1, -1].to(self.device)
|
label = data[:, -1, -1].to(self.device)
|
||||||
|
@ -288,7 +284,6 @@ class LSTM(Model):
|
||||||
preds = []
|
preds = []
|
||||||
|
|
||||||
for data in test_loader:
|
for data in test_loader:
|
||||||
|
|
||||||
feature = data[:, :, 0:-1].to(self.device)
|
feature = data[:, :, 0:-1].to(self.device)
|
||||||
|
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
|
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче