Adjust rolling api (#1594)

* Intermediate version * Fix yaml template & Successfully run rolling * Be compatible with benchmark * Get same results with previous linear model * Black formatting * Update black * Update the placeholder mechanism * Update CI * Update CI * Upgrade Black * Fix CI and simplify code * Fix CI * Move the data processing caching mechanism into utils. * Adjusting DDG-DA * Organize import
2023-07-14 12:16:12 +08:00 · 2023-07-14 12:16:12 +08:00 · be4646b4b7
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@ -38,7 +38,7 @@ jobs:
        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
      run: |
        twine upload dist/*
-        
+
  deploy_with_manylinux:
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/test_qlib_from_pip.yml
+++ b/.github/workflows/test_qlib_from_pip.yml
@ -8,6 +8,7 @@ on:

 jobs:
  build:
+    if: ${{ false }}  #  FIXME: temporarily disable... Due to we are rushing a feature
    timeout-minutes: 120

    runs-on: ${{ matrix.os }}
--- a/.github/workflows/test_qlib_from_source.yml
+++ b/.github/workflows/test_qlib_from_source.yml
@ -64,7 +64,10 @@ jobs:
        python -m pip install -e .[dev]

    - name: Lint with Black
+      # Python 3.7 will use a black with low level. So we use python with higher version for black check
+      if: (matrix.python-version != '3.7')
      run: |
+        pip install -U black  # follow the latest version of black, previous Qlib dependency will downgrade black
        black . -l 120 --check --diff

    - name: Make html with sphinx
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -1,6 +1,6 @@
 repos:
 -   repo: https://github.com/psf/black
-    rev: 22.6.0
+    rev: 23.7.0
    hooks:
    -   id: black
        args: ["qlib", "-l 120"]
@ -9,4 +9,4 @@ repos:
    rev: 4.0.1
    hooks:
        - id: flake8
-          args: ["--ignore=E501,F541,E266,E402,W503,E731,E203"]
+          args: ["--ignore=E501,F541,E266,E402,W503,E731,E203"]
--- a/docs/component/workflow.rst
+++ b/docs/component/workflow.rst
@ -53,9 +53,7 @@ Below is a typical config file of ``qrun``.
            kwargs:
                topk: 50
                n_drop: 5
-                signal:
-                    - <MODEL>
-                    - <DATASET>
+                signal: <PRED>
        backtest:
            limit_threshold: 0.095
            account: 100000000
@ -281,9 +279,7 @@ The following script is the configuration of `backtest` and the `strategy` used
            kwargs:
                topk: 50
                n_drop: 5
-                signal:
-                    - <MODEL>
-                    - <DATASET>
+                signal: <PRED>
        backtest:
            limit_threshold: 0.095
            account: 100000000
--- a/examples/benchmarks/ADARNN/workflow_config_adarnn_Alpha360.yaml
+++ b/examples/benchmarks/ADARNN/workflow_config_adarnn_Alpha360.yaml
@ -28,8 +28,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            model: <MODEL>
-            dataset: <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/ADD/workflow_config_add_Alpha360.yaml
+++ b/examples/benchmarks/ADD/workflow_config_add_Alpha360.yaml
@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL>
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/ALSTM/workflow_config_alstm_Alpha158.yaml
+++ b/examples/benchmarks/ALSTM/workflow_config_alstm_Alpha158.yaml
@ -36,9 +36,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/ALSTM/workflow_config_alstm_Alpha360.yaml
+++ b/examples/benchmarks/ALSTM/workflow_config_alstm_Alpha360.yaml
@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha158.yaml
+++ b/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha158.yaml
@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha158_csi500.yaml
+++ b/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha158_csi500.yaml
@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha360.yaml
+++ b/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha360.yaml
@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha360_csi500.yaml
+++ b/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha360_csi500.yaml
@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha158.yaml
+++ b/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha158.yaml
@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha158_csi500.yaml
+++ b/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha158_csi500.yaml
@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha360.yaml
+++ b/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha360.yaml
@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha360_csi500.yaml
+++ b/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_Alpha360_csi500.yaml
@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_early_stop_Alpha158.yaml
+++ b/examples/benchmarks/DoubleEnsemble/workflow_config_doubleensemble_early_stop_Alpha158.yaml
@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/GATs/workflow_config_gats_Alpha158.yaml
+++ b/examples/benchmarks/GATs/workflow_config_gats_Alpha158.yaml
@ -35,9 +35,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/GATs/workflow_config_gats_Alpha360.yaml
+++ b/examples/benchmarks/GATs/workflow_config_gats_Alpha360.yaml
@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/GRU/workflow_config_gru_Alpha158.yaml
+++ b/examples/benchmarks/GRU/workflow_config_gru_Alpha158.yaml
@ -36,9 +36,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/GRU/workflow_config_gru_Alpha360.yaml
+++ b/examples/benchmarks/GRU/workflow_config_gru_Alpha360.yaml
@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/HIST/workflow_config_hist_Alpha360.yaml
+++ b/examples/benchmarks/HIST/workflow_config_hist_Alpha360.yaml
@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
@ -89,4 +87,4 @@ task:
        - class: PortAnaRecord
          module_path: qlib.workflow.record_temp
          kwargs: 
-            config: *port_analysis_config
+            config: *port_analysis_config
--- a/examples/benchmarks/IGMTF/workflow_config_igmtf_Alpha360.yaml
+++ b/examples/benchmarks/IGMTF/workflow_config_igmtf_Alpha360.yaml
@ -28,8 +28,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            model: <MODEL>
-            dataset: <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/KRNN/workflow_config_krnn_Alpha360.yaml
+++ b/examples/benchmarks/KRNN/workflow_config_krnn_Alpha360.yaml
@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/LSTM/workflow_config_lstm_Alpha158.yaml
+++ b/examples/benchmarks/LSTM/workflow_config_lstm_Alpha158.yaml
@ -36,9 +36,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/LSTM/workflow_config_lstm_Alpha360.yaml
+++ b/examples/benchmarks/LSTM/workflow_config_lstm_Alpha360.yaml
@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/LightGBM/multi_freq_handler.py
+++ b/examples/benchmarks/LightGBM/multi_freq_handler.py
@ -48,7 +48,6 @@ class Avg15minHandler(DataHandlerLP):
        )

    def loader_config(self):
-
        # Results for dataset: df: pd.DataFrame
        #   len(df.columns) == 6 + 6 * 16, len(df.index.get_level_values(level="datetime").unique()) == T
        #   df.columns: close0, close1, ..., close16, open0, ..., open16, ..., vwap16
--- a/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
+++ b/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
@ -14,8 +14,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            model: <MODEL> 
-            dataset: <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158_csi500.yaml
+++ b/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158_csi500.yaml
@ -14,8 +14,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            model: <MODEL> 
-            dataset: <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158_multi_freq.yaml
+++ b/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158_multi_freq.yaml
@ -33,9 +33,7 @@ port_analysis_config: &port_analysis_config
        kwargs:
            topk: 50
            n_drop: 5
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
    backtest:
        verbose: False
        limit_threshold: 0.095
--- a/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha360.yaml
+++ b/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha360.yaml
@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha360_csi500.yaml
+++ b/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha360_csi500.yaml
@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/LightGBM/workflow_config_lightgbm_configurable_dataset.yaml
+++ b/examples/benchmarks/LightGBM/workflow_config_lightgbm_configurable_dataset.yaml
@ -29,9 +29,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/LightGBM/workflow_config_lightgbm_multi_freq.yaml
+++ b/examples/benchmarks/LightGBM/workflow_config_lightgbm_multi_freq.yaml
@ -31,9 +31,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/Linear/workflow_config_linear_Alpha158.yaml
+++ b/examples/benchmarks/Linear/workflow_config_linear_Alpha158.yaml
@ -27,9 +27,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/Linear/workflow_config_linear_Alpha158_csi500.yaml
+++ b/examples/benchmarks/Linear/workflow_config_linear_Alpha158_csi500.yaml
@ -27,9 +27,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/Localformer/workflow_config_localformer_Alpha158.yaml
+++ b/examples/benchmarks/Localformer/workflow_config_localformer_Alpha158.yaml
@ -36,9 +36,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/Localformer/workflow_config_localformer_Alpha360.yaml
+++ b/examples/benchmarks/Localformer/workflow_config_localformer_Alpha360.yaml
@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/MLP/workflow_config_mlp_Alpha158.yaml
+++ b/examples/benchmarks/MLP/workflow_config_mlp_Alpha158.yaml
@ -41,9 +41,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/MLP/workflow_config_mlp_Alpha158_csi500.yaml
+++ b/examples/benchmarks/MLP/workflow_config_mlp_Alpha158_csi500.yaml
@ -41,9 +41,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/MLP/workflow_config_mlp_Alpha360.yaml
+++ b/examples/benchmarks/MLP/workflow_config_mlp_Alpha360.yaml
@ -29,9 +29,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/MLP/workflow_config_mlp_Alpha360_csi500.yaml
+++ b/examples/benchmarks/MLP/workflow_config_mlp_Alpha360_csi500.yaml
@ -29,9 +29,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/SFM/workflow_config_sfm_Alpha360.yaml
+++ b/examples/benchmarks/SFM/workflow_config_sfm_Alpha360.yaml
@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/Sandwich/workflow_config_sandwich_Alpha360.yaml
+++ b/examples/benchmarks/Sandwich/workflow_config_sandwich_Alpha360.yaml
@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/TCN/workflow_config_tcn_Alpha158.yaml
+++ b/examples/benchmarks/TCN/workflow_config_tcn_Alpha158.yaml
@ -36,8 +36,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            model: <MODEL>
-            dataset: <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/TCN/workflow_config_tcn_Alpha360.yaml
+++ b/examples/benchmarks/TCN/workflow_config_tcn_Alpha360.yaml
@ -28,8 +28,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            model: <MODEL>
-            dataset: <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/TCTS/workflow_config_tcts_Alpha360.yaml
+++ b/examples/benchmarks/TCTS/workflow_config_tcts_Alpha360.yaml
@ -30,9 +30,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
@ -95,4 +93,4 @@ task:
        - class: PortAnaRecord
          module_path: qlib.workflow.record_temp
          kwargs: 
-            config: *port_analysis_config
+            config: *port_analysis_config
--- a/examples/benchmarks/TFT/data_formatters/base.py
+++ b/examples/benchmarks/TFT/data_formatters/base.py
@ -139,7 +139,6 @@ class GenericDataFormatter(abc.ABC):
        # Sanity checks first.
        # Ensure only one ID and time column exist
        def _check_single_column(input_type):
-
            length = len([tup for tup in column_definition if tup[2] == input_type])

            if length != 1:
--- a/examples/benchmarks/TFT/expt_settings/configs.py
+++ b/examples/benchmarks/TFT/expt_settings/configs.py
@ -78,7 +78,6 @@ class ExperimentConfig:

    @property
    def hyperparam_iterations(self):
-
        return 240 if self.experiment == "volatility" else 60

    def make_data_formatter(self):
--- a/examples/benchmarks/TFT/libs/hyperparam_opt.py
+++ b/examples/benchmarks/TFT/libs/hyperparam_opt.py
@ -88,7 +88,6 @@ class HyperparamOptManager:
        params_file = os.path.join(self.hyperparam_folder, "params.csv")

        if os.path.exists(results_file) and os.path.exists(params_file):
-
            self.results = pd.read_csv(results_file, index_col=0)
            self.saved_params = pd.read_csv(params_file, index_col=0)

@ -178,7 +177,6 @@ class HyperparamOptManager:
            return parameters

        for _ in range(self._max_tries):
-
            parameters = _get_next()
            name = self._get_name(parameters)

--- a/examples/benchmarks/TFT/libs/tft_model.py
+++ b/examples/benchmarks/TFT/libs/tft_model.py
@ -475,7 +475,6 @@ class TemporalFusionTransformer:

        embeddings = []
        for i in range(num_categorical_variables):
-
            embedding = tf.keras.Sequential(
                [
                    tf.keras.layers.InputLayer([time_steps]),
@ -680,7 +679,6 @@ class TemporalFusionTransformer:

        data_map = {}
        for _, sliced in data.groupby(id_col):
-
            col_mappings = {"identifier": [id_col], "time": [time_col], "outputs": [target_col], "inputs": input_cols}

            for k in col_mappings:
@ -954,7 +952,6 @@ class TemporalFusionTransformer:
        """

        with tf.variable_scope(self.name):
-
            transformer_layer, all_inputs, attention_components = self._build_base_graph()

            outputs = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(self.output_size * len(self.quantiles)))(
--- a/examples/benchmarks/TFT/workflow_config_tft_Alpha158.yaml
+++ b/examples/benchmarks/TFT/workflow_config_tft_Alpha158.yaml
@ -16,9 +16,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/TRA/example.py
+++ b/examples/benchmarks/TRA/example.py
@ -6,7 +6,6 @@ from qlib.utils import init_instance_by_config


 def main(seed, config_file="configs/config_alstm.yaml"):
-
    # set random seed
    with open(config_file) as f:
        config = yaml.safe_load(f)
@ -30,7 +29,6 @@ def main(seed, config_file="configs/config_alstm.yaml"):


 if __name__ == "__main__":
-
    # set params from cmd
    parser = argparse.ArgumentParser(allow_abbrev=False)
    parser.add_argument("--seed", type=int, default=1000, help="random seed")
--- a/examples/benchmarks/TRA/src/dataset.py
+++ b/examples/benchmarks/TRA/src/dataset.py
@ -96,7 +96,6 @@ class MTSDatasetH(DatasetH):
        drop_last=False,
        **kwargs,
    ):
-
        assert horizon > 0, "please specify `horizon` to avoid data leakage"

        self.seq_len = seq_len
@ -111,7 +110,6 @@ class MTSDatasetH(DatasetH):
        super().__init__(handler, segments, **kwargs)

    def setup_data(self, handler_kwargs: dict = None, **kwargs):
-
        super().setup_data()

        # change index to <code, date>
--- a/examples/benchmarks/TRA/src/model.py
+++ b/examples/benchmarks/TRA/src/model.py
@ -45,7 +45,6 @@ class TRAModel(Model):
        avg_params=True,
        **kwargs,
    ):
-
        np.random.seed(seed)
        torch.manual_seed(seed)

@ -93,7 +92,6 @@ class TRAModel(Model):
        self.global_step = -1

    def train_epoch(self, data_set):
-
        self.model.train()
        self.tra.train()

@ -146,7 +144,6 @@ class TRAModel(Model):
        return total_loss

    def test_epoch(self, data_set, return_pred=False):
-
        self.model.eval()
        self.tra.eval()
        data_set.eval()
@ -204,7 +201,6 @@ class TRAModel(Model):
        return metrics, preds

    def fit(self, dataset, evals_result=dict()):
-
        train_set, valid_set, test_set = dataset.prepare(["train", "valid", "test"])

        best_score = -1
@ -380,7 +376,6 @@ class LSTM(nn.Module):
            self.output_size = hidden_size

    def forward(self, x):
-
        x = self.input_drop(x)

        if self.training and self.noise_level > 0:
@ -464,7 +459,6 @@ class Transformer(nn.Module):
        self.output_size = hidden_size

    def forward(self, x):
-
        x = self.input_drop(x)

        if self.training and self.noise_level > 0:
@ -514,7 +508,6 @@ class TRA(nn.Module):
        self.predictors = nn.Linear(input_size, num_states)

    def forward(self, hidden, hist_loss):
-
        preds = self.predictors(hidden)

        if self.num_states == 1:
--- a/examples/benchmarks/TRA/workflow_config_tra_Alpha158.yaml
+++ b/examples/benchmarks/TRA/workflow_config_tra_Alpha158.yaml
@ -57,9 +57,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/TRA/workflow_config_tra_Alpha158_full.yaml
+++ b/examples/benchmarks/TRA/workflow_config_tra_Alpha158_full.yaml
@ -51,9 +51,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/TRA/workflow_config_tra_Alpha360.yaml
+++ b/examples/benchmarks/TRA/workflow_config_tra_Alpha360.yaml
@ -51,9 +51,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha158.yaml
+++ b/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha158.yaml
@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha360.yaml
+++ b/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha360.yaml
@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/Transformer/workflow_config_transformer_Alpha158.yaml
+++ b/examples/benchmarks/Transformer/workflow_config_transformer_Alpha158.yaml
@ -36,9 +36,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/Transformer/workflow_config_transformer_Alpha360.yaml
+++ b/examples/benchmarks/Transformer/workflow_config_transformer_Alpha360.yaml
@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/XGBoost/workflow_config_xgboost_Alpha158.yaml
+++ b/examples/benchmarks/XGBoost/workflow_config_xgboost_Alpha158.yaml
@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks/XGBoost/workflow_config_xgboost_Alpha360.yaml
+++ b/examples/benchmarks/XGBoost/workflow_config_xgboost_Alpha360.yaml
@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks_dynamic/DDG-DA/README.md
+++ b/examples/benchmarks_dynamic/DDG-DA/README.md
@ -16,12 +16,12 @@ Though the dataset is different, the conclusion remains the same. By applying `D
 # Run the Code
 Users can try `DDG-DA` by running the following command:
 ```bash
-    python workflow.py run_all
+    python workflow.py run
 ```

 The default forecasting models are `Linear`. Users can choose other forecasting models by changing the `forecast_model` parameter when `DDG-DA` initializes. For example, users can try `LightGBM` forecasting models by running the following command:
 ```bash
-    python workflow.py --forecast_model="gbdt" run_all
+    python workflow.py --conf_path=../workflow_config_lightgbm_Alpha158.yaml run
 ```

 # Results
--- a/examples/benchmarks_dynamic/DDG-DA/workflow.py
+++ b/examples/benchmarks_dynamic/DDG-DA/workflow.py
@ -1,305 +1,40 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 from pathlib import Path
-from qlib.model.meta.task import MetaTask
-from qlib.contrib.meta.data_selection.model import MetaModelDS
-from qlib.contrib.meta.data_selection.dataset import InternalData, MetaDatasetDS
-from qlib.data.dataset.handler import DataHandlerLP
+from typing import Union

-import pandas as pd
 import fire
-import sys
-import pickle
-from typing import Optional
+
 from qlib import auto_init
-from qlib.model.trainer import TrainerR
-from qlib.typehint import Literal
-from qlib.utils import init_instance_by_config
-from qlib.workflow import R
+from qlib.contrib.rolling.ddgda import DDGDA
 from qlib.tests.data import GetData

 DIRNAME = Path(__file__).absolute().resolve().parent
-sys.path.append(str(DIRNAME.parent / "baseline"))
-from rolling_benchmark import RollingBenchmark  # NOTE: sys.path is changed for import RollingBenchmark
+BENCH_DIR = DIRNAME.parent / "baseline"


-class DDGDA:
-    """
-    please run `python workflow.py run_all` to run the full workflow of the experiment
+class DDGDABench(DDGDA):
+    # The config in the README.md
+    CONF_LIST = [
+        BENCH_DIR / "workflow_config_linear_Alpha158.yaml",
+        BENCH_DIR / "workflow_config_lightgbm_Alpha158.yaml",
+    ]

-    **NOTE**
-    before running the example, please clean your previous results with following command
-    - `rm -r mlruns`
-    """
+    DEFAULT_CONF = CONF_LIST[0]  # Linear by default due to efficiency

-    def __init__(
-        self,
-        sim_task_model: Literal["linear", "gbdt"] = "gbdt",
-        forecast_model: Literal["linear", "gbdt"] = "linear",
-        h_path: Optional[str] = None,
-        test_end: Optional[str] = None,
-        train_start: Optional[str] = None,
-        meta_1st_train_end: Optional[str] = None,
-        task_ext_conf: Optional[dict] = None,
-        alpha: float = 0.01,
-        proxy_hd: str = "handler_proxy.pkl",
-    ):
-        """
+    def __init__(self, conf_path: Union[str, Path] = DEFAULT_CONF, horizon=20, **kwargs) -> None:
+        # This code is for being compatible with the previous old code
+        conf_path = Path(conf_path)
+        super().__init__(conf_path=conf_path, horizon=horizon, working_dir=DIRNAME, **kwargs)

-        Parameters
-        ----------
-
-        train_start: Optional[str]
-            the start datetime for data.  It is used in training start time (for both tasks & meta learing)
-        test_end: Optional[str]
-            the end datetime for data. It is used in test end time
-        meta_1st_train_end: Optional[str]
-            the datetime of training end of the first meta_task
-        alpha: float
-            Setting the L2 regularization for ridge
-            The `alpha` is only passed to MetaModelDS (it is not passed to sim_task_model currently..)
-        """
-        self.step = 20
-        # NOTE:
-        # the horizon must match the meaning in the base task template
-        self.horizon = 20
-        self.meta_exp_name = "DDG-DA"
-        self.sim_task_model = sim_task_model  # The model to capture the distribution of data.
-        self.forecast_model = forecast_model  # downstream forecasting models' type
-        self.rb_kwargs = {
-            "h_path": h_path,
-            "test_end": test_end,
-            "train_start": train_start,
-            "task_ext_conf": task_ext_conf,
-        }
-        self.alpha = alpha
-        self.meta_1st_train_end = meta_1st_train_end
-        self.proxy_hd = proxy_hd
-
-    def get_feature_importance(self):
-        # this must be lightGBM, because it needs to get the feature importance
-        rb = RollingBenchmark(model_type="gbdt", **self.rb_kwargs)
-        task = rb.basic_task()
-
-        with R.start(experiment_name="feature_importance"):
-            model = init_instance_by_config(task["model"])
-            dataset = init_instance_by_config(task["dataset"])
-            model.fit(dataset)
-
-        fi = model.get_feature_importance()
-
-        # Because the model use numpy instead of dataframe for training lightgbm
-        # So the we must use following extra steps to get the right feature importance
-        df = dataset.prepare(segments=slice(None), col_set="feature", data_key=DataHandlerLP.DK_R)
-        cols = df.columns
-        fi_named = {cols[int(k.split("_")[1])]: imp for k, imp in fi.to_dict().items()}
-
-        return pd.Series(fi_named)
-
-    def dump_data_for_proxy_model(self):
-        """
-        Dump data for training meta model.
-        The meta model will be trained upon the proxy forecasting model.
-        This dataset is for the proxy forecasting model.
-        """
-        topk = 30
-        fi = self.get_feature_importance()
-        col_selected = fi.nlargest(topk)
-
-        rb = RollingBenchmark(model_type=self.sim_task_model, **self.rb_kwargs)
-        task = rb.basic_task()
-        dataset = init_instance_by_config(task["dataset"])
-        prep_ds = dataset.prepare(slice(None), col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
-
-        feature_df = prep_ds["feature"]
-        label_df = prep_ds["label"]
-
-        feature_selected = feature_df.loc[:, col_selected.index]
-
-        feature_selected = feature_selected.groupby("datetime", group_keys=False).apply(
-            lambda df: (df - df.mean()).div(df.std())
-        )
-        feature_selected = feature_selected.fillna(0.0)
-
-        df_all = {
-            "label": label_df.reindex(feature_selected.index),
-            "feature": feature_selected,
-        }
-        df_all = pd.concat(df_all, axis=1)
-        df_all.to_pickle(DIRNAME / "fea_label_df.pkl")
-
-        # dump data in handler format for aligning the interface
-        handler = DataHandlerLP(
-            data_loader={
-                "class": "qlib.data.dataset.loader.StaticDataLoader",
-                "kwargs": {"config": DIRNAME / "fea_label_df.pkl"},
-            }
-        )
-        handler.to_pickle(DIRNAME / self.proxy_hd, dump_all=True)
-
-    @property
-    def _internal_data_path(self):
-        return DIRNAME / f"internal_data_s{self.step}.pkl"
-
-    def dump_meta_ipt(self):
-        """
-        Dump data for training meta model.
-        This function will dump the input data for meta model
-        """
-        # According to the experiments, the choice of the model type is very important for achieving good results
-        rb = RollingBenchmark(model_type=self.sim_task_model, **self.rb_kwargs)
-        sim_task = rb.basic_task()
-
-        if self.sim_task_model == "gbdt":
-            sim_task["model"].setdefault("kwargs", {}).update({"early_stopping_rounds": None, "num_boost_round": 150})
-
-        exp_name_sim = f"data_sim_s{self.step}"
-
-        internal_data = InternalData(sim_task, self.step, exp_name=exp_name_sim)
-        internal_data.setup(trainer=TrainerR)
-
-        with self._internal_data_path.open("wb") as f:
-            pickle.dump(internal_data, f)
-
-    def train_meta_model(self, fill_method="max"):
-        """
-        training a meta model based on a simplified linear proxy model;
-        """
-
-        # 1) leverage the simplified proxy forecasting model to train meta model.
-        # - Only the dataset part is important, in current version of meta model will integrate the
-        rb = RollingBenchmark(model_type=self.sim_task_model, **self.rb_kwargs)
-        sim_task = rb.basic_task()
-        # the train_start for training meta model does not necessarily align with final rolling
-        train_start = "2008-01-01" if self.rb_kwargs.get("train_start") is None else self.rb_kwargs.get("train_start")
-        train_end = "2010-12-31" if self.meta_1st_train_end is None else self.meta_1st_train_end
-        test_start = (pd.Timestamp(train_end) + pd.Timedelta(days=1)).strftime("%Y-%m-%d")
-        proxy_forecast_model_task = {
-            # "model": "qlib.contrib.model.linear.LinearModel",
-            "dataset": {
-                "class": "qlib.data.dataset.DatasetH",
-                "kwargs": {
-                    "handler": f"file://{(DIRNAME / self.proxy_hd).absolute()}",
-                    "segments": {
-                        "train": (train_start, train_end),
-                        "test": (test_start, sim_task["dataset"]["kwargs"]["segments"]["test"][1]),
-                    },
-                },
-            },
-            # "record": ["qlib.workflow.record_temp.SignalRecord"]
-        }
-        # the proxy_forecast_model_task will be used to create meta tasks.
-        # The test date of first task will be 2011-01-01. Each test segment will be about 20days
-        # The tasks include all training tasks and test tasks.
-
-        # 2) preparing meta dataset
-        kwargs = dict(
-            task_tpl=proxy_forecast_model_task,
-            step=self.step,
-            segments=0.62,  # keep test period consistent with the dataset yaml
-            trunc_days=1 + self.horizon,
-            hist_step_n=30,
-            fill_method=fill_method,
-            rolling_ext_days=0,
-        )
-        # NOTE:
-        # the input of meta model (internal data) are shared between proxy model and final forecasting model
-        # but their task test segment are not aligned! It worked in my previous experiment.
-        # So the misalignment will not affect the effectiveness of the method.
-        with self._internal_data_path.open("rb") as f:
-            internal_data = pickle.load(f)
-
-        md = MetaDatasetDS(exp_name=internal_data, **kwargs)
-
-        # 3) train and logging meta model
-        with R.start(experiment_name=self.meta_exp_name):
-            R.log_params(**kwargs)
-            mm = MetaModelDS(
-                step=self.step, hist_step_n=kwargs["hist_step_n"], lr=0.001, max_epoch=30, seed=43, alpha=self.alpha
-            )
-            mm.fit(md)
-            R.save_objects(model=mm)
-
-    @property
-    def _task_path(self):
-        return DIRNAME / f"tasks_s{self.step}.pkl"
-
-    def meta_inference(self):
-        """
-        Leverage meta-model for inference:
-        - Given
-            - baseline tasks
-            - input for meta model(internal data)
-            - meta model (its learnt knowledge on proxy forecasting model is expected to transfer to normal forecasting model)
-        """
-        # 1) get meta model
-        exp = R.get_exp(experiment_name=self.meta_exp_name)
-        rec = exp.list_recorders(rtype=exp.RT_L)[0]
-        meta_model: MetaModelDS = rec.load_object("model")
-
-        # 2)
-        # we are transfer to knowledge of meta model to final forecasting tasks.
-        # Create MetaTaskDataset for the final forecasting tasks
-        # Aligning the setting of it to the MetaTaskDataset when training Meta model is necessary
-
-        # 2.1) get previous config
-        param = rec.list_params()
-        trunc_days = int(param["trunc_days"])
-        step = int(param["step"])
-        hist_step_n = int(param["hist_step_n"])
-        fill_method = param.get("fill_method", "max")
-
-        rb = RollingBenchmark(model_type=self.forecast_model, **self.rb_kwargs)
-        task_l = rb.create_rolling_tasks()
-
-        # 2.2) create meta dataset for final dataset
-        kwargs = dict(
-            task_tpl=task_l,
-            step=step,
-            segments=0.0,  # all the tasks are for testing
-            trunc_days=trunc_days,
-            hist_step_n=hist_step_n,
-            fill_method=fill_method,
-            task_mode=MetaTask.PROC_MODE_TRANSFER,
-        )
-
-        with self._internal_data_path.open("rb") as f:
-            internal_data = pickle.load(f)
-        mds = MetaDatasetDS(exp_name=internal_data, **kwargs)
-
-        # 3) meta model make inference and get new qlib task
-        new_tasks = meta_model.inference(mds)
-        with self._task_path.open("wb") as f:
-            pickle.dump(new_tasks, f)
-
-    def train_and_eval_tasks(self):
-        """
-        Training the tasks generated by meta model
-        Then evaluate it
-        """
-        with self._task_path.open("rb") as f:
-            tasks = pickle.load(f)
-        rb = RollingBenchmark(rolling_exp="rolling_ds", model_type=self.forecast_model, **self.rb_kwargs)
-        rb.train_rolling_tasks(tasks)
-        rb.ens_rolling()
-        rb.update_rolling_rec()
-
-    def run_all(self):
-        # 1) file: handler_proxy.pkl (self.proxy_hd)
-        self.dump_data_for_proxy_model()
-        # 2)
-        # file: internal_data_s20.pkl
-        # mlflow: data_sim_s20, models for calculating meta_ipt
-        self.dump_meta_ipt()
-        # 3) meta model will be stored in `DDG-DA`
-        self.train_meta_model()
-        # 4) new_tasks are saved in "tasks_s20.pkl" (reweighter is added)
-        self.meta_inference()
-        # 5) load the saved tasks and train model
-        self.train_and_eval_tasks()
+        for f in self.CONF_LIST:
+            if conf_path.samefile(f):
+                break
+        else:
+            self.logger.warning("Model type is not in the benchmark!")


 if __name__ == "__main__":
    GetData().qlib_data(exists_skip=True)
    auto_init()
-    fire.Fire(DDGDA)
+    fire.Fire(DDGDABench)
--- a/examples/benchmarks_dynamic/baseline/README.md
+++ b/examples/benchmarks_dynamic/baseline/README.md
@ -5,11 +5,12 @@ This is the framework of periodically Rolling Retrain (RR) forecasting models. R
 ## Run the Code
 Users can try RR by running the following command:
 ```bash
-    python rolling_benchmark.py run_all
+    python rolling_benchmark.py run
 ```

 The default forecasting models are `Linear`. Users can choose other forecasting models by changing the `model_type` parameter.
 For example, users can try `LightGBM` forecasting models by running the following command:
 ```bash
-    python rolling_benchmark.py --model_type="gbdt" run_all
-```
+    python rolling_benchmark.py --conf_path=workflow_config_lightgbm_Alpha158.yaml run
+
+```
--- a/examples/benchmarks_dynamic/baseline/rolling_benchmark.py
+++ b/examples/benchmarks_dynamic/baseline/rolling_benchmark.py
@ -1,161 +1,33 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
-from typing import Optional
-from qlib.model.ens.ensemble import RollingEnsemble
-from qlib.utils import init_instance_by_config
-import fire
-import yaml
-import pandas as pd
-from qlib import auto_init
 from pathlib import Path
-from tqdm.auto import tqdm
-from qlib.model.trainer import TrainerR
-from qlib.log import get_module_logger
-from qlib.utils.data import update_config
-from qlib.workflow import R
+from typing import Union
+
+import fire
+
+from qlib import auto_init
+from qlib.contrib.rolling.base import Rolling
 from qlib.tests.data import GetData

 DIRNAME = Path(__file__).absolute().resolve().parent
-from qlib.workflow.task.gen import task_generator, RollingGen
-from qlib.workflow.task.collect import RecorderCollector
-from qlib.workflow.record_temp import PortAnaRecord, SigAnaRecord


-class RollingBenchmark:
-    """
-    **NOTE**
-    before running the example, please clean your previous results with following command
-    - `rm -r mlruns`
+class RollingBenchmark(Rolling):
+    # The config in the README.md
+    CONF_LIST = [DIRNAME / "workflow_config_linear_Alpha158.yaml", DIRNAME / "workflow_config_lightgbm_Alpha158.yaml"]

-    """
+    DEFAULT_CONF = CONF_LIST[0]

-    def __init__(
-        self,
-        rolling_exp: str = "rolling_models",
-        model_type: str = "linear",
-        h_path: Optional[str] = None,
-        train_start: Optional[str] = None,
-        test_end: Optional[str] = None,
-        task_ext_conf: Optional[dict] = None,
-    ) -> None:
-        """
-        Parameters
-        ----------
-        rolling_exp : str
-            The name for the experiments for rolling
-        model_type : str
-            The model to be boosted.
-        h_path : Optional[str]
-            the dumped data handler;
-        test_end : Optional[str]
-            the test end for the data. It is typically used together with the handler
-        train_start : Optional[str]
-            the train start for the data.  It is typically used together with the handler.
-        task_ext_conf : Optional[dict]
-            some option to update the
-        """
-        self.step = 20
-        self.horizon = 20
-        self.rolling_exp = rolling_exp
-        self.model_type = model_type
-        self.h_path = h_path
-        self.train_start = train_start
-        self.test_end = test_end
-        self.logger = get_module_logger("RollingBenchmark")
-        self.task_ext_conf = task_ext_conf
+    def __init__(self, conf_path: Union[str, Path] = DEFAULT_CONF, horizon=20, **kwargs) -> None:
+        # This code is for being compatible with the previous old code
+        conf_path = Path(conf_path)
+        super().__init__(conf_path=conf_path, horizon=horizon, **kwargs)

-    def basic_task(self):
-        """For fast training rolling"""
-        if self.model_type == "gbdt":
-            conf_path = DIRNAME / "workflow_config_lightgbm_Alpha158.yaml"
-            # dump the processed data on to disk for later loading to speed up the processing
-            h_path = DIRNAME / "lightgbm_alpha158_handler_horizon{}.pkl".format(self.horizon)
-        elif self.model_type == "linear":
-            # We use ridge regression to stabilize the performance
-            conf_path = DIRNAME / "workflow_config_linear_Alpha158.yaml"
-            h_path = DIRNAME / "linear_alpha158_handler_horizon{}.pkl".format(self.horizon)
+        for f in self.CONF_LIST:
+            if conf_path.samefile(f):
+                break
        else:
-            raise AssertionError("Model type is not supported!")
-
-        if self.h_path is not None:
-            h_path = Path(self.h_path)
-
-        with conf_path.open("r") as f:
-            conf = yaml.safe_load(f)
-
-        # modify dataset horizon
-        conf["task"]["dataset"]["kwargs"]["handler"]["kwargs"]["label"] = [
-            "Ref($close, -{}) / Ref($close, -1) - 1".format(self.horizon + 1)
-        ]
-
-        task = conf["task"]
-
-        if self.task_ext_conf is not None:
-            task = update_config(task, self.task_ext_conf)
-
-        if not h_path.exists():
-            h_conf = task["dataset"]["kwargs"]["handler"]
-            h = init_instance_by_config(h_conf)
-            h.to_pickle(h_path, dump_all=True)
-
-        task["dataset"]["kwargs"]["handler"] = f"file://{h_path}"
-        task["record"] = ["qlib.workflow.record_temp.SignalRecord"]
-
-        if self.train_start is not None:
-            seg = task["dataset"]["kwargs"]["segments"]["train"]
-            task["dataset"]["kwargs"]["segments"]["train"] = pd.Timestamp(self.train_start), seg[1]
-
-        if self.test_end is not None:
-            seg = task["dataset"]["kwargs"]["segments"]["test"]
-            task["dataset"]["kwargs"]["segments"]["test"] = seg[0], pd.Timestamp(self.test_end)
-        self.logger.info(task)
-        return task
-
-    def create_rolling_tasks(self):
-        task = self.basic_task()
-        task_l = task_generator(
-            task, RollingGen(step=self.step, trunc_days=self.horizon + 1)
-        )  # the last two days should be truncated to avoid information leakage
-        return task_l
-
-    def train_rolling_tasks(self, task_l=None):
-        if task_l is None:
-            task_l = self.create_rolling_tasks()
-        trainer = TrainerR(experiment_name=self.rolling_exp)
-        trainer(task_l)
-
-    COMB_EXP = "rolling"
-
-    def ens_rolling(self):
-        rc = RecorderCollector(
-            experiment=self.rolling_exp,
-            artifacts_key=["pred", "label"],
-            process_list=[RollingEnsemble()],
-            # rec_key_func=lambda rec: (self.COMB_EXP, rec.info["id"]),
-            artifacts_path={"pred": "pred.pkl", "label": "label.pkl"},
-        )
-        res = rc()
-        with R.start(experiment_name=self.COMB_EXP):
-            R.log_params(exp_name=self.rolling_exp)
-            R.save_objects(**{"pred.pkl": res["pred"], "label.pkl": res["label"]})
-
-    def update_rolling_rec(self):
-        """
-        Evaluate the combined rolling results
-        """
-        for _, rec in R.list_recorders(experiment_name=self.COMB_EXP).items():
-            for rt_cls in SigAnaRecord, PortAnaRecord:
-                rt = rt_cls(recorder=rec, skip_existing=True)
-                rt.generate()
-        print(f"Your evaluation results can be found in the experiment named `{self.COMB_EXP}`.")
-
-    def run_all(self):
-        # the results will be  save in mlruns.
-        # 1) each rolling task is saved in rolling_models
-        self.train_rolling_tasks()
-        # 2) combined rolling tasks and evaluation results are saved in rolling
-        self.ens_rolling()
-        self.update_rolling_rec()
+            self.logger.warning("Model type is not in the benchmark!")


 if __name__ == "__main__":
--- a/examples/benchmarks_dynamic/baseline/workflow_config_lightgbm_Alpha158.yaml
+++ b/examples/benchmarks_dynamic/baseline/workflow_config_lightgbm_Alpha158.yaml
@ -14,8 +14,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            model: <MODEL> 
-            dataset: <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/benchmarks_dynamic/baseline/workflow_config_linear_Alpha158.yaml
+++ b/examples/benchmarks_dynamic/baseline/workflow_config_linear_Alpha158.yaml
@ -27,9 +27,7 @@ port_analysis_config: &port_analysis_config
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            signal:
-                - <MODEL> 
-                - <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
--- a/examples/highfreq/highfreq_handler.py
+++ b/examples/highfreq/highfreq_handler.py
@ -14,7 +14,6 @@ class HighFreqHandler(DataHandlerLP):
        fit_end_time=None,
        drop_raw=True,
    ):
-
        infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
        learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)

--- a/examples/highfreq/workflow.py
+++ b/examples/highfreq/workflow.py
@ -18,7 +18,6 @@ from highfreq_ops import get_calendar_day, DayLast, FFillNan, BFillNan, Date, Se


 class HighfreqWorkflow:
-
    SPEC_CONF = {"custom_ops": [DayLast, FFillNan, BFillNan, Date, Select, IsNull, Cut], "expression_cache": None}

    MARKET = "all"
--- a/examples/hyperparameter/LightGBM/hyperparameter_158.py
+++ b/examples/hyperparameter/LightGBM/hyperparameter_158.py
@ -35,7 +35,6 @@ def objective(trial):


 if __name__ == "__main__":
-
    provider_uri = "~/.qlib/qlib_data/cn_data"
    GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
    qlib.init(provider_uri=provider_uri, region="cn")
--- a/examples/hyperparameter/LightGBM/hyperparameter_360.py
+++ b/examples/hyperparameter/LightGBM/hyperparameter_360.py
@ -38,7 +38,6 @@ def objective(trial):


 if __name__ == "__main__":
-
    provider_uri = "~/.qlib/qlib_data/cn_data"
    GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
    qlib.init(provider_uri=provider_uri, region=REG_CN)
--- a/examples/model_interpreter/feature.py
+++ b/examples/model_interpreter/feature.py
@ -11,7 +11,6 @@ from qlib.tests.config import CSI300_GBDT_TASK


 if __name__ == "__main__":
-
    # use default data
    provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
    GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
--- a/examples/portfolio/prepare_riskdata.py
+++ b/examples/portfolio/prepare_riskdata.py
@ -9,7 +9,6 @@ from qlib.model.riskmodel import StructuredCovEstimator


 def prepare_data(riskdata_root="./riskdata", T=240, start_time="2016-01-01"):
-
    universe = D.features(D.instruments("csi300"), ["$close"], start_time=start_time).swaplevel().sort_index()

    price_all = (
@ -20,7 +19,6 @@ def prepare_data(riskdata_root="./riskdata", T=240, start_time="2016-01-01"):
    riskmodel = StructuredCovEstimator()

    for i in range(T - 1, len(price_all)):
-
        date = price_all.index[i]
        ref_date = price_all.index[i - T + 1]

@ -47,7 +45,6 @@ def prepare_data(riskdata_root="./riskdata", T=240, start_time="2016-01-01"):


 if __name__ == "__main__":
-
    import qlib

    qlib.init(provider_uri="~/.qlib/qlib_data/cn_data")
--- a/examples/rolling_process_data/workflow.py
+++ b/examples/rolling_process_data/workflow.py
@ -13,7 +13,6 @@ from qlib.tests.data import GetData


 class RollingDataWorkflow:
-
    MARKET = "csi300"
    start_time = "2010-01-01"
    end_time = "2019-12-31"
@ -93,7 +92,6 @@ class RollingDataWorkflow:
        dataset = init_instance_by_config(dataset_config)

        for rolling_offset in range(self.rolling_cnt):
-
            print(f"===========rolling{rolling_offset} start===========")
            if rolling_offset:
                dataset.config(
--- a/examples/workflow_by_code.py
+++ b/examples/workflow_by_code.py
@ -17,7 +17,6 @@ from qlib.tests.config import CSI300_BENCH, CSI300_GBDT_TASK


 if __name__ == "__main__":
-
    # use default data
    provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
    GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
--- a/qlib/init.py
+++ b/qlib/init.py
@ -77,7 +77,6 @@ def init(default_conf="client", **kwargs):


 def _mount_nfs_uri(provider_uri, mount_path, auto_mount: bool = False):
-
    LOG = get_module_logger("mount nfs", level=logging.INFO)
    if mount_path is None:
        raise ValueError(f"Invalid mount path: {mount_path}!")
--- a/qlib/backtest/init.py
+++ b/qlib/backtest/init.py
@ -182,7 +182,6 @@ def get_strategy_executor(
    exchange_kwargs: dict = {},
    pos_type: str = "Position",
 ) -> Tuple[BaseStrategy, BaseExecutor]:
-
    # NOTE:
    # - for avoiding recursive import
    # - typing annotations is not reliable
--- a/qlib/backtest/exchange.py
+++ b/qlib/backtest/exchange.py
@ -638,7 +638,6 @@ class Exchange:
        random.seed(0)
        random.shuffle(sorted_ids)
        for stock_id in sorted_ids:
-
            # Do not generate order for the non-tradable stocks
            if not self.is_stock_tradable(stock_id=stock_id, start_time=start_time, end_time=end_time):
                continue
--- a/qlib/config.py
+++ b/qlib/config.py
@ -293,7 +293,6 @@ class QlibConfig(Config):
        """

        def __init__(self, provider_uri: Union[str, Path, dict], mount_path: Union[str, Path, dict]):
-
            """
            The relation of `provider_uri` and `mount_path`
            - `mount_path` is used only if provider_uri is an NFS path
--- a/qlib/contrib/data/dataset.py
+++ b/qlib/contrib/data/dataset.py
@ -130,7 +130,6 @@ class MTSDatasetH(DatasetH):
        input_size=None,
        **kwargs,
    ):
-
        assert num_states == 0 or horizon > 0, "please specify `horizon` to avoid data leakage"
        assert memory_mode in ["sample", "daily"], "unsupported memory mode"
        assert memory_mode == "sample" or batch_size < 0, "daily memory requires daily sampling (`batch_size < 0`)"
@ -153,7 +152,6 @@ class MTSDatasetH(DatasetH):
        super().__init__(handler, segments, **kwargs)

    def setup_data(self, handler_kwargs: dict = None, **kwargs):
-
        super().setup_data(**kwargs)

        if handler_kwargs is not None:
@ -288,7 +286,6 @@ class MTSDatasetH(DatasetH):
            daily_count = []  # store number of samples for each day

            for j in indices[i : i + batch_size]:
-
                # normal sampling: self.batch_size > 0 => slices is a list => slices_subset is a slice
                # daily sampling: self.batch_size < 0 => slices is a nested list => slices_subset is a list
                slices_subset = slices[j]
@ -297,7 +294,6 @@ class MTSDatasetH(DatasetH):
                # each slices_subset contains a list of slices for multiple stocks
                # NOTE: daily sampling is used in 1) eval mode, 2) train mode with self.batch_size < 0
                if self.batch_size < 0:
-
                    # store daily index
                    idx = self._daily_index.index[j]  # daily_index.index is the index of the original data
                    daily_index.append(idx)
@ -320,7 +316,6 @@ class MTSDatasetH(DatasetH):
                    slices_subset = [slices_subset]

                for slc in slices_subset:
-
                    # legacy support for Alpha360 data by `input_size`
                    if self.input_size:
                        data.append(self._data[slc.stop - 1].reshape(self.input_size, -1).T)
--- a/qlib/contrib/data/highfreq_handler.py
+++ b/qlib/contrib/data/highfreq_handler.py
@ -17,7 +17,6 @@ class HighFreqHandler(DataHandlerLP):
        fit_end_time=None,
        drop_raw=True,
    ):
-
        infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
        learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)

@ -318,7 +317,6 @@ class HighFreqOrderHandler(DataHandlerLP):
        inst_processors=None,
        drop_raw=True,
    ):
-
        infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
        learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)

--- a/qlib/contrib/data/highfreq_processor.py
+++ b/qlib/contrib/data/highfreq_processor.py
@ -29,7 +29,6 @@ class HighFreqNorm(Processor):
        feature_save_dir: str,
        norm_groups: Dict[str, int],
    ):
-
        self.fit_start_time = fit_start_time
        self.fit_end_time = fit_end_time
        self.feature_save_dir = feature_save_dir
--- a/qlib/contrib/meta/data_selection/dataset.py
+++ b/qlib/contrib/meta/data_selection/dataset.py
@ -49,6 +49,8 @@ class InternalData:

        # 1) prepare the prediction of proxy models
        perf_task_tpl = deepcopy(self.task_tpl)  # this task is supposed to contains no complicated objects
+        # The only thing we want to save is the prediction
+        perf_task_tpl["record"] = ["qlib.workflow.record_temp.SignalRecord"]

        trainer = auto_filter_kwargs(trainer)(experiment_name=self.exp_name, **trainer_kwargs)
        # NOTE:
--- a/qlib/contrib/model/pytorch_adarnn.py
+++ b/qlib/contrib/model/pytorch_adarnn.py
@ -246,7 +246,6 @@ class ADARNN(Model):
        evals_result=dict(),
        save_path=None,
    ):
-
        df_train, df_valid = dataset.prepare(
            ["train", "valid"],
            col_set=["feature", "label"],
@ -318,7 +317,6 @@ class ADARNN(Model):
        preds = []

        for begin in range(sample_num)[:: self.batch_size]:
-
            if sample_num - begin < self.batch_size:
                end = sample_num
            else:
--- a/qlib/contrib/model/pytorch_alstm.py
+++ b/qlib/contrib/model/pytorch_alstm.py
@ -146,7 +146,6 @@ class ALSTM(Model):
        raise ValueError("unknown loss `%s`" % self.loss)

    def metric_fn(self, pred, label):
-
        mask = torch.isfinite(label)

        if self.metric in ("", "loss"):
@ -155,7 +154,6 @@ class ALSTM(Model):
        raise ValueError("unknown metric `%s`" % self.metric)

    def train_epoch(self, x_train, y_train):
-
        x_train_values = x_train.values
        y_train_values = np.squeeze(y_train.values)

@ -165,7 +163,6 @@ class ALSTM(Model):
        np.random.shuffle(indices)

        for i in range(len(indices))[:: self.batch_size]:
-
            if len(indices) - i < self.batch_size:
                break

@ -181,7 +178,6 @@ class ALSTM(Model):
            self.train_optimizer.step()

    def test_epoch(self, data_x, data_y):
-
        # prepare training data
        x_values = data_x.values
        y_values = np.squeeze(data_y.values)
@ -194,7 +190,6 @@ class ALSTM(Model):
        indices = np.arange(len(x_values))

        for i in range(len(indices))[:: self.batch_size]:
-
            if len(indices) - i < self.batch_size:
                break

@ -217,7 +212,6 @@ class ALSTM(Model):
        evals_result=dict(),
        save_path=None,
    ):
-
        df_train, df_valid, df_test = dataset.prepare(
            ["train", "valid", "test"],
            col_set=["feature", "label"],
@ -282,7 +276,6 @@ class ALSTM(Model):
        preds = []

        for begin in range(sample_num)[:: self.batch_size]:
-
            if sample_num - begin < self.batch_size:
                end = sample_num
            else:
--- a/qlib/contrib/model/pytorch_alstm_ts.py
+++ b/qlib/contrib/model/pytorch_alstm_ts.py
@ -156,7 +156,6 @@ class ALSTM(Model):
        raise ValueError("unknown loss `%s`" % self.loss)

    def metric_fn(self, pred, label):
-
        mask = torch.isfinite(label)

        if self.metric in ("", "loss"):
@ -165,10 +164,9 @@ class ALSTM(Model):
        raise ValueError("unknown metric `%s`" % self.metric)

    def train_epoch(self, data_loader):
-
        self.ALSTM_model.train()

-        for (data, weight) in data_loader:
+        for data, weight in data_loader:
            feature = data[:, :, 0:-1].to(self.device)
            label = data[:, -1, -1].to(self.device)

@ -181,14 +179,12 @@ class ALSTM(Model):
            self.train_optimizer.step()

    def test_epoch(self, data_loader):
-
        self.ALSTM_model.eval()

        scores = []
        losses = []

-        for (data, weight) in data_loader:
-
+        for data, weight in data_loader:
            feature = data[:, :, 0:-1].to(self.device)
            # feature[torch.isnan(feature)] = 0
            label = data[:, -1, -1].to(self.device)
@ -295,7 +291,6 @@ class ALSTM(Model):
        preds = []

        for data in test_loader:
-
            feature = data[:, :, 0:-1].to(self.device)

            with torch.no_grad():
--- a/qlib/contrib/model/pytorch_gats.py
+++ b/qlib/contrib/model/pytorch_gats.py
@ -154,7 +154,6 @@ class GATs(Model):
        raise ValueError("unknown loss `%s`" % self.loss)

    def metric_fn(self, pred, label):
-
        mask = torch.isfinite(label)

        if self.metric in ("", "loss"):
@ -175,7 +174,6 @@ class GATs(Model):
        return daily_index, daily_count

    def train_epoch(self, x_train, y_train):
-
        x_train_values = x_train.values
        y_train_values = np.squeeze(y_train.values)
        self.GAT_model.train()
@ -197,7 +195,6 @@ class GATs(Model):
            self.train_optimizer.step()

    def test_epoch(self, data_x, data_y):
-
        # prepare training data
        x_values = data_x.values
        y_values = np.squeeze(data_y.values)
@ -230,7 +227,6 @@ class GATs(Model):
        evals_result=dict(),
        save_path=None,
    ):
-
        df_train, df_valid, df_test = dataset.prepare(
            ["train", "valid", "test"],
            col_set=["feature", "label"],
--- a/qlib/contrib/model/pytorch_gats_ts.py
+++ b/qlib/contrib/model/pytorch_gats_ts.py
@ -32,7 +32,6 @@ class DailyBatchSampler(Sampler):
        self.daily_index[0] = 0

    def __iter__(self):
-
        for idx, count in zip(self.daily_index, self.daily_count):
            yield np.arange(idx, idx + count)

@ -173,7 +172,6 @@ class GATs(Model):
        raise ValueError("unknown loss `%s`" % self.loss)

    def metric_fn(self, pred, label):
-
        mask = torch.isfinite(label)

        if self.metric in ("", "loss"):
@ -194,11 +192,9 @@ class GATs(Model):
        return daily_index, daily_count

    def train_epoch(self, data_loader):
-
        self.GAT_model.train()

        for data in data_loader:
-
            data = data.squeeze()
            feature = data[:, :, 0:-1].to(self.device)
            label = data[:, -1, -1].to(self.device)
@ -212,14 +208,12 @@ class GATs(Model):
            self.train_optimizer.step()

    def test_epoch(self, data_loader):
-
        self.GAT_model.eval()

        scores = []
        losses = []

        for data in data_loader:
-
            data = data.squeeze()
            feature = data[:, :, 0:-1].to(self.device)
            # feature[torch.isnan(feature)] = 0
@ -240,7 +234,6 @@ class GATs(Model):
        evals_result=dict(),
        save_path=None,
    ):
-
        dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
        dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
        if dl_train.empty or dl_valid.empty:
@ -329,7 +322,6 @@ class GATs(Model):
        preds = []

        for data in test_loader:
-
            data = data.squeeze()
            feature = data[:, :, 0:-1].to(self.device)

--- a/qlib/contrib/model/pytorch_gru.py
+++ b/qlib/contrib/model/pytorch_gru.py
@ -146,7 +146,6 @@ class GRU(Model):
        raise ValueError("unknown loss `%s`" % self.loss)

    def metric_fn(self, pred, label):
-
        mask = torch.isfinite(label)

        if self.metric in ("", "loss"):
@ -155,7 +154,6 @@ class GRU(Model):
        raise ValueError("unknown metric `%s`" % self.metric)

    def train_epoch(self, x_train, y_train):
-
        x_train_values = x_train.values
        y_train_values = np.squeeze(y_train.values)

@ -165,7 +163,6 @@ class GRU(Model):
        np.random.shuffle(indices)

        for i in range(len(indices))[:: self.batch_size]:
-
            if len(indices) - i < self.batch_size:
                break

@ -181,7 +178,6 @@ class GRU(Model):
            self.train_optimizer.step()

    def test_epoch(self, data_x, data_y):
-
        # prepare training data
        x_values = data_x.values
        y_values = np.squeeze(data_y.values)
@ -194,7 +190,6 @@ class GRU(Model):
        indices = np.arange(len(x_values))

        for i in range(len(indices))[:: self.batch_size]:
-
            if len(indices) - i < self.batch_size:
                break

@ -217,7 +212,6 @@ class GRU(Model):
        evals_result=dict(),
        save_path=None,
    ):
-
        df_train, df_valid, df_test = dataset.prepare(
            ["train", "valid", "test"],
            col_set=["feature", "label"],
@ -282,7 +276,6 @@ class GRU(Model):
        preds = []

        for begin in range(sample_num)[:: self.batch_size]:
-
            if sample_num - begin < self.batch_size:
                end = sample_num
            else:
--- a/qlib/contrib/model/pytorch_gru_ts.py
+++ b/qlib/contrib/model/pytorch_gru_ts.py
@ -154,7 +154,6 @@ class GRU(Model):
        raise ValueError("unknown loss `%s`" % self.loss)

    def metric_fn(self, pred, label):
-
        mask = torch.isfinite(label)

        if self.metric in ("", "loss"):
@ -163,10 +162,9 @@ class GRU(Model):
        raise ValueError("unknown metric `%s`" % self.metric)

    def train_epoch(self, data_loader):
-
        self.GRU_model.train()

-        for (data, weight) in data_loader:
+        for data, weight in data_loader:
            feature = data[:, :, 0:-1].to(self.device)
            label = data[:, -1, -1].to(self.device)

@ -179,14 +177,12 @@ class GRU(Model):
            self.train_optimizer.step()

    def test_epoch(self, data_loader):
-
        self.GRU_model.eval()

        scores = []
        losses = []

-        for (data, weight) in data_loader:
-
+        for data, weight in data_loader:
            feature = data[:, :, 0:-1].to(self.device)
            # feature[torch.isnan(feature)] = 0
            label = data[:, -1, -1].to(self.device)
@ -293,7 +289,6 @@ class GRU(Model):
        preds = []

        for data in test_loader:
-
            feature = data[:, :, 0:-1].to(self.device)

            with torch.no_grad():
--- a/qlib/contrib/model/pytorch_hist.py
+++ b/qlib/contrib/model/pytorch_hist.py
@ -160,7 +160,6 @@ class HIST(Model):
        raise ValueError("unknown loss `%s`" % self.loss)

    def metric_fn(self, pred, label):
-
        mask = torch.isfinite(label)

        if self.metric == "ic":
@ -189,7 +188,6 @@ class HIST(Model):
        return daily_index, daily_count

    def train_epoch(self, x_train, y_train, stock_index):
-
        stock2concept_matrix = np.load(self.stock2concept)
        x_train_values = x_train.values
        y_train_values = np.squeeze(y_train.values)
@ -214,7 +212,6 @@ class HIST(Model):
            self.train_optimizer.step()

    def test_epoch(self, data_x, data_y, stock_index):
-
        # prepare training data
        stock2concept_matrix = np.load(self.stock2concept)
        x_values = data_x.values
--- a/qlib/contrib/model/pytorch_igmtf.py
+++ b/qlib/contrib/model/pytorch_igmtf.py
@ -153,7 +153,6 @@ class IGMTF(Model):
        raise ValueError("unknown loss `%s`" % self.loss)

    def metric_fn(self, pred, label):
-
        mask = torch.isfinite(label)

        if self.metric == "ic":
@ -201,7 +200,6 @@ class IGMTF(Model):
        return train_hidden, train_hidden_day

    def train_epoch(self, x_train, y_train, train_hidden, train_hidden_day):
-
        x_train_values = x_train.values
        y_train_values = np.squeeze(y_train.values)

@ -222,7 +220,6 @@ class IGMTF(Model):
            self.train_optimizer.step()

    def test_epoch(self, data_x, data_y, train_hidden, train_hidden_day):
-
        # prepare training data
        x_values = data_x.values
        y_values = np.squeeze(data_y.values)
@ -254,7 +251,6 @@ class IGMTF(Model):
        evals_result=dict(),
        save_path=None,
    ):
-
        df_train, df_valid = dataset.prepare(
            ["train", "valid"],
            col_set=["feature", "label"],
--- a/qlib/contrib/model/pytorch_localformer.py
+++ b/qlib/contrib/model/pytorch_localformer.py
@ -46,7 +46,6 @@ class LocalformerModel(Model):
        seed=None,
        **kwargs
    ):
-
        # set hyper-parameters.
        self.d_model = d_model
        self.dropout = dropout
@ -96,7 +95,6 @@ class LocalformerModel(Model):
        raise ValueError("unknown loss `%s`" % self.loss)

    def metric_fn(self, pred, label):
-
        mask = torch.isfinite(label)

        if self.metric in ("", "loss"):
@ -105,7 +103,6 @@ class LocalformerModel(Model):
        raise ValueError("unknown metric `%s`" % self.metric)

    def train_epoch(self, x_train, y_train):
-
        x_train_values = x_train.values
        y_train_values = np.squeeze(y_train.values)

@ -115,7 +112,6 @@ class LocalformerModel(Model):
        np.random.shuffle(indices)

        for i in range(len(indices))[:: self.batch_size]:
-
            if len(indices) - i < self.batch_size:
                break

@ -131,7 +127,6 @@ class LocalformerModel(Model):
            self.train_optimizer.step()

    def test_epoch(self, data_x, data_y):
-
        # prepare training data
        x_values = data_x.values
        y_values = np.squeeze(data_y.values)
@ -144,7 +139,6 @@ class LocalformerModel(Model):
        indices = np.arange(len(x_values))

        for i in range(len(indices))[:: self.batch_size]:
-
            if len(indices) - i < self.batch_size:
                break

@ -167,7 +161,6 @@ class LocalformerModel(Model):
        evals_result=dict(),
        save_path=None,
    ):
-
        df_train, df_valid, df_test = dataset.prepare(
            ["train", "valid", "test"],
            col_set=["feature", "label"],
@ -232,7 +225,6 @@ class LocalformerModel(Model):
        preds = []

        for begin in range(sample_num)[:: self.batch_size]:
-
            if sample_num - begin < self.batch_size:
                end = sample_num
            else:
--- a/qlib/contrib/model/pytorch_localformer_ts.py
+++ b/qlib/contrib/model/pytorch_localformer_ts.py
@ -44,7 +44,6 @@ class LocalformerModel(Model):
        seed=None,
        **kwargs
    ):
-
        # set hyper-parameters.
        self.d_model = d_model
        self.dropout = dropout
@ -96,7 +95,6 @@ class LocalformerModel(Model):
        raise ValueError("unknown loss `%s`" % self.loss)

    def metric_fn(self, pred, label):
-
        mask = torch.isfinite(label)

        if self.metric in ("", "loss"):
@ -105,7 +103,6 @@ class LocalformerModel(Model):
        raise ValueError("unknown metric `%s`" % self.metric)

    def train_epoch(self, data_loader):
-
        self.model.train()

        for data in data_loader:
@ -121,14 +118,12 @@ class LocalformerModel(Model):
            self.train_optimizer.step()

    def test_epoch(self, data_loader):
-
        self.model.eval()

        scores = []
        losses = []

        for data in data_loader:
-
            feature = data[:, :, 0:-1].to(self.device)
            label = data[:, -1, -1].to(self.device)

@ -148,7 +143,6 @@ class LocalformerModel(Model):
        evals_result=dict(),
        save_path=None,
    ):
-
        dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
        dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
        if dl_train.empty or dl_valid.empty:
--- a/qlib/contrib/model/pytorch_lstm.py
+++ b/qlib/contrib/model/pytorch_lstm.py
@ -142,7 +142,6 @@ class LSTM(Model):
        raise ValueError("unknown loss `%s`" % self.loss)

    def metric_fn(self, pred, label):
-
        mask = torch.isfinite(label)

        if self.metric in ("", "loss"):
@ -151,7 +150,6 @@ class LSTM(Model):
        raise ValueError("unknown metric `%s`" % self.metric)

    def train_epoch(self, x_train, y_train):
-
        x_train_values = x_train.values
        y_train_values = np.squeeze(y_train.values)

@ -161,7 +159,6 @@ class LSTM(Model):
        np.random.shuffle(indices)

        for i in range(len(indices))[:: self.batch_size]:
-
            if len(indices) - i < self.batch_size:
                break

@ -177,7 +174,6 @@ class LSTM(Model):
            self.train_optimizer.step()

    def test_epoch(self, data_x, data_y):
-
        # prepare training data
        x_values = data_x.values
        y_values = np.squeeze(data_y.values)
@ -190,7 +186,6 @@ class LSTM(Model):
        indices = np.arange(len(x_values))

        for i in range(len(indices))[:: self.batch_size]:
-
            if len(indices) - i < self.batch_size:
                break

@ -212,7 +207,6 @@ class LSTM(Model):
        evals_result=dict(),
        save_path=None,
    ):
-
        df_train, df_valid, df_test = dataset.prepare(
            ["train", "valid", "test"],
            col_set=["feature", "label"],
--- a/qlib/contrib/model/pytorch_lstm_ts.py
+++ b/qlib/contrib/model/pytorch_lstm_ts.py
@ -150,7 +150,6 @@ class LSTM(Model):
        raise ValueError("unknown loss `%s`" % self.loss)

    def metric_fn(self, pred, label):
-
        mask = torch.isfinite(label)

        if self.metric in ("", "loss"):
@ -159,10 +158,9 @@ class LSTM(Model):
        raise ValueError("unknown metric `%s`" % self.metric)

    def train_epoch(self, data_loader):
-
        self.LSTM_model.train()

-        for (data, weight) in data_loader:
+        for data, weight in data_loader:
            feature = data[:, :, 0:-1].to(self.device)
            label = data[:, -1, -1].to(self.device)

@ -175,14 +173,12 @@ class LSTM(Model):
            self.train_optimizer.step()

    def test_epoch(self, data_loader):
-
        self.LSTM_model.eval()

        scores = []
        losses = []

-        for (data, weight) in data_loader:
-
+        for data, weight in data_loader:
            feature = data[:, :, 0:-1].to(self.device)
            # feature[torch.isnan(feature)] = 0
            label = data[:, -1, -1].to(self.device)
@ -288,7 +284,6 @@ class LSTM(Model):
        preds = []

        for data in test_loader:
-
            feature = data[:, :, 0:-1].to(self.device)

            with torch.no_grad():
--- a/Показать больше
+++ b/Показать больше