* add_test_pit

* add_test_pit_to_tests

* add_baostock_to_setup

* add_pip_to_CI

Co-authored-by: Linlang Lv (iSoftStone) <v-linlanglv@microsoft.com>
This commit is contained in:
Linlang 2022-05-06 16:47:20 +08:00 коммит произвёл GitHub
Родитель 9e381493c2
Коммит 2cf842bcfe
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
3 изменённых файлов: 55 добавлений и 17 удалений

1
.github/workflows/test.yml поставляемый
Просмотреть файл

@ -122,6 +122,7 @@ jobs:
- name: Unit tests with Pytest - name: Unit tests with Pytest
run: | run: |
pip install -r scripts/data_collector/pit/requirements.txt
cd tests cd tests
python -m pytest . --durations=10 python -m pytest . --durations=10

1
.github/workflows/test_macos.yml поставляемый
Просмотреть файл

@ -83,6 +83,7 @@ jobs:
python -m pip install black pytest python -m pip install black pytest
- name: Unit tests with Pytest - name: Unit tests with Pytest
run: | run: |
pip install -r scripts/data_collector/pit/requirements.txt
cd tests cd tests
python -m pytest . --durations=0 python -m pytest . --durations=0
- name: Test workflow by config (install from source) - name: Test workflow by config (install from source)

Просмотреть файл

@ -1,28 +1,64 @@
# Copyright (c) Microsoft Corporation. # Copyright (c) Microsoft Corporation.
# Licensed under the MIT License. # Licensed under the MIT License.
import pandas as pd
import sys
import qlib import qlib
from qlib.data import D import shutil
import unittest import unittest
import pandas as pd
import baostock as bs
from pathlib import Path
from qlib.data import D
from scripts.get_data import GetData
from scripts.dump_pit import DumpPitData
sys.path.append(str(Path(__file__).resolve().parent.parent.joinpath("scripts/data_collector/pit")))
from collector import Run
pd.set_option("display.width", 1000) pd.set_option("display.width", 1000)
pd.set_option("display.max_columns", None) pd.set_option("display.max_columns", None)
DATA_DIR = Path(__file__).parent.joinpath("test_pit_data")
SOURCE_DIR = DATA_DIR.joinpath("stock_data/source")
SOURCE_DIR.mkdir(exist_ok=True, parents=True)
QLIB_DIR = DATA_DIR.joinpath("qlib_data")
QLIB_DIR.mkdir(exist_ok=True, parents=True)
class TestPIT(unittest.TestCase): class TestPIT(unittest.TestCase):
""" @classmethod
NOTE!!!!!! def tearDownClass(cls) -> None:
The assert of this test assumes that users follows the cmd below and only download 2 stock. shutil.rmtree(str(DATA_DIR.resolve()))
1. `python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn`
2. `python scripts/data_collector/pit/collector.py download_data --source_dir ~/.qlib/stock_data/source/pit --start 2000-01-01 --end 2020-01-01 --interval quarterly --symbol_regex "^(600519|000725).*"` @classmethod
3. `python scripts/data_collector/pit/collector.py normalize_data --interval quarterly --source_dir ~/.qlib/stock_data/source/pit --normalize_dir ~/.qlib/stock_data/source/pit_normalized` def setUpClass(cls) -> None:
4. `python scripts/dump_pit.py dump --csv_path ~/.qlib/stock_data/source/pit_normalized --qlib_dir ~/.qlib/qlib_data/cn_data --interval quarterly` cn_data_dir = str(QLIB_DIR.joinpath("cn_data").resolve())
""" pit_dir = str(SOURCE_DIR.joinpath("pit").resolve())
pit_normalized_dir = str(SOURCE_DIR.joinpath("pit_normalized").resolve())
GetData().qlib_data(name="qlib_data_simple", target_dir=cn_data_dir, region="cn")
bs.login()
Run(
source_dir=pit_dir,
interval="quarterly",
).download_data(start="2000-01-01", end="2020-01-01", symbol_regex="^(600519|000725).*")
Run(
source_dir=pit_dir,
normalize_dir=pit_normalized_dir,
interval="quarterly",
).normalize_data()
bs.logout()
DumpPitData(
csv_path=pit_normalized_dir,
qlib_dir=cn_data_dir,
).dump(interval="quarterly")
def setUp(self): def setUp(self):
# qlib.init(kernels=1) # NOTE: set kernel to 1 to make it debug easier # qlib.init(kernels=1) # NOTE: set kernel to 1 to make it debug easier
qlib.init() provider_uri = str(QLIB_DIR.joinpath("cn_data").resolve())
qlib.init(provider_uri=provider_uri)
def to_str(self, obj): def to_str(self, obj):
return "".join(str(obj).split()) return "".join(str(obj).split())
@ -66,7 +102,7 @@ class TestPIT(unittest.TestCase):
data["$close"] = 1 # in case of different dataset gives different values data["$close"] = 1 # in case of different dataset gives different values
expect = """ expect = """
P($$roewa_q) P($$yoyni_q) $close P($$roewa_q) P($$yoyni_q) $close
instrument datetime instrument datetime
sh600519 2019-01-02 0.25522 0.243892 1 sh600519 2019-01-02 0.25522 0.243892 1
2019-01-03 0.25522 0.243892 1 2019-01-03 0.25522 0.243892 1
2019-01-04 0.25522 0.243892 1 2019-01-04 0.25522 0.243892 1
@ -78,7 +114,7 @@ class TestPIT(unittest.TestCase):
2019-07-17 NaN NaN 1 2019-07-17 NaN NaN 1
2019-07-18 NaN NaN 1 2019-07-18 NaN NaN 1
2019-07-19 NaN NaN 1 2019-07-19 NaN NaN 1
[266 rows x 3 columns] [266 rows x 3 columns]
""" """
self.check_same(data, expect) self.check_same(data, expect)
@ -191,7 +227,7 @@ class TestPIT(unittest.TestCase):
data = D.features(instruments, fields, start_time="2019-01-01", end_time="2020-01-01", freq="day") data = D.features(instruments, fields, start_time="2019-01-01", end_time="2020-01-01", freq="day")
except_data = """ except_data = """
P($$roewa_q) P($$yoyni_q) P(($$roewa_q / $$yoyni_q) / Ref($$roewa_q / $$yoyni_q, 1) - 1) P(Sum($$yoyni_q, 4)) $close P($$roewa_q) * $close P($$roewa_q) P($$yoyni_q) P(($$roewa_q / $$yoyni_q) / Ref($$roewa_q / $$yoyni_q, 1) - 1) P(Sum($$yoyni_q, 4)) $close P($$roewa_q) * $close
instrument datetime instrument datetime
sh600519 2019-01-02 0.255220 0.243892 1.484224 1.661578 63.595333 16.230801 sh600519 2019-01-02 0.255220 0.243892 1.484224 1.661578 63.595333 16.230801
2019-01-03 0.255220 0.243892 1.484224 1.661578 62.641907 15.987467 2019-01-03 0.255220 0.243892 1.484224 1.661578 62.641907 15.987467
2019-01-04 0.255220 0.243892 1.484224 1.661578 63.915985 16.312637 2019-01-04 0.255220 0.243892 1.484224 1.661578 63.915985 16.312637
@ -203,7 +239,7 @@ class TestPIT(unittest.TestCase):
2019-12-27 0.255819 0.219821 0.677052 1.081693 125.307404 32.056015 2019-12-27 0.255819 0.219821 0.677052 1.081693 125.307404 32.056015
2019-12-30 0.255819 0.219821 0.677052 1.081693 127.763992 32.684456 2019-12-30 0.255819 0.219821 0.677052 1.081693 127.763992 32.684456
2019-12-31 0.255819 0.219821 0.677052 1.081693 127.462303 32.607277 2019-12-31 0.255819 0.219821 0.677052 1.081693 127.462303 32.607277
[244 rows x 6 columns] [244 rows x 6 columns]
""" """
self.check_same(data, except_data) self.check_same(data, except_data)
@ -219,7 +255,7 @@ class TestPIT(unittest.TestCase):
data = D.features(instruments, fields, start_time="2018-04-28", end_time="2019-07-19", freq="day") data = D.features(instruments, fields, start_time="2018-04-28", end_time="2019-07-19", freq="day")
except_data = """ except_data = """
PRef($$roewa_q, 201902) PRef($$yoyni_q, 201801) P($$roewa_q) P($$roewa_q) / PRef($$roewa_q, 201801) PRef($$roewa_q, 201902) PRef($$yoyni_q, 201801) P($$roewa_q) P($$roewa_q) / PRef($$roewa_q, 201801)
instrument datetime instrument datetime
sh600519 2018-05-02 NaN 0.395075 0.088887 1.000000 sh600519 2018-05-02 NaN 0.395075 0.088887 1.000000
2018-05-03 NaN 0.395075 0.088887 1.000000 2018-05-03 NaN 0.395075 0.088887 1.000000
2018-05-04 NaN 0.395075 0.088887 1.000000 2018-05-04 NaN 0.395075 0.088887 1.000000
@ -231,7 +267,7 @@ class TestPIT(unittest.TestCase):
2019-07-17 0.000000 0.395075 0.000000 0.000000 2019-07-17 0.000000 0.395075 0.000000 0.000000
2019-07-18 0.175322 0.395075 0.175322 1.972414 2019-07-18 0.175322 0.395075 0.175322 1.972414
2019-07-19 0.175322 0.395075 0.175322 1.972414 2019-07-19 0.175322 0.395075 0.175322 1.972414
[299 rows x 4 columns] [299 rows x 4 columns]
""" """
self.check_same(data, except_data) self.check_same(data, except_data)