* More dataloader example

* optimize code

* optimeze code

* optimeze code

* optimeze code

* optimeze code

* optimeze code

* fix pylint error

* fix CI error

* fix CI error

* Comments

* fix error type

---------

Co-authored-by: Young <afe.young@gmail.com>
This commit is contained in:
Linlang 2024-07-10 14:48:44 +08:00 коммит произвёл GitHub
Родитель a7d5a9b500
Коммит 2c33332dd6
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
5 изменённых файлов: 48 добавлений и 5 удалений

2
.github/workflows/test_qlib_from_pip.yml поставляемый
Просмотреть файл

@ -16,7 +16,7 @@ jobs:
# Since macos-latest changed from 12.7.4 to 14.4.1,
# the minimum python version that matches a 14.4.1 version of macos is 3.10,
# so we limit the macos version to macos-12.
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-12]
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-12]
# not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
python-version: [3.7, 3.8]

2
.github/workflows/test_qlib_from_source.yml поставляемый
Просмотреть файл

@ -17,7 +17,7 @@ jobs:
# Since macos-latest changed from 12.7.4 to 14.4.1,
# the minimum python version that matches a 14.4.1 version of macos is 3.10,
# so we limit the macos version to macos-12.
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-12]
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-12]
# not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
python-version: [3.7, 3.8]

Просмотреть файл

@ -17,7 +17,7 @@ jobs:
# Since macos-latest changed from 12.7.4 to 14.4.1,
# the minimum python version that matches a 14.4.1 version of macos is 3.10,
# so we limit the macos version to macos-12.
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-12]
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-12]
# not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
python-version: [3.7, 3.8]

Просмотреть файл

@ -41,6 +41,7 @@ class DataLoader(abc.ABC):
----------
instruments : str or dict
it can either be the market name or the config file of instruments generated by InstrumentProvider.
If the value of instruments is None, it means that no filtering is done.
start_time : str
start of the time range.
end_time : str
@ -50,6 +51,11 @@ class DataLoader(abc.ABC):
-------
pd.DataFrame:
data load from the under layer source
Raise
-----
KeyError:
if the instruments filter is not supported, raise KeyError
"""
@ -320,7 +326,13 @@ class NestedDataLoader(DataLoader):
def load(self, instruments=None, start_time=None, end_time=None) -> pd.DataFrame:
df_full = None
for dl in self.data_loader_l:
df_current = dl.load(instruments, start_time, end_time)
try:
df_current = dl.load(instruments, start_time, end_time)
except KeyError:
warnings.warn(
"If the value of `instruments` cannot be processed, it will set instruments to None to get all the data."
)
df_current = dl.load(instruments=None, start_time=start_time, end_time=end_time)
if df_full is None:
df_full = df_current
else:

Просмотреть файл

@ -7,8 +7,10 @@ import qlib
from pathlib import Path
sys.path.append(str(Path(__file__).resolve().parent))
from qlib.data.dataset.loader import NestedDataLoader
from qlib.data.dataset.loader import NestedDataLoader, QlibDataLoader
from qlib.data.dataset.handler import DataHandlerLP
from qlib.contrib.data.loader import Alpha158DL, Alpha360DL
from qlib.data import D
class TestDataLoader(unittest.TestCase):
@ -44,6 +46,35 @@ class TestDataLoader(unittest.TestCase):
assert "LABEL0" in columns_list
# Then you can use it wth DataHandler;
# NOTE: please note that the data processors are missing!!! You should add based on your requirements
"""
dataset.to_pickle("test_df.pkl")
nested_data_loader = NestedDataLoader(
dataloader_l=[
{
"class": "qlib.contrib.data.loader.Alpha158DL",
"kwargs": {"config": {"label": (["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"])}},
},
{
"class": "qlib.contrib.data.loader.Alpha360DL",
},
{
"class": "qlib.data.dataset.loader.StaticDataLoader",
"kwargs": {"config": "test_df.pkl"},
},
]
)
data_handler_config = {
"start_time": "2008-01-01",
"end_time": "2020-08-01",
"instruments": "csi300",
"data_loader": nested_data_loader,
}
data_handler = DataHandlerLP(**data_handler_config)
data = data_handler.fetch()
print(data)
"""
if __name__ == "__main__":