зеркало из https://github.com/microsoft/qlib.git
fix get data error (#1793)
* fix get data error * fix get v0 data error * optimize get_data code * fix pylint error * add comments
This commit is contained in:
Родитель
63021018d6
Коммит
155f80323c
|
@ -16,7 +16,7 @@ Current version of script with default value tries to connect localhost **via de
|
||||||
|
|
||||||
Run following command to install necessary libraries
|
Run following command to install necessary libraries
|
||||||
```
|
```
|
||||||
pip install pytest coverage
|
pip install pytest coverage gdown
|
||||||
pip install arctic # NOTE: pip may fail to resolve the right package dependency !!! Please make sure the dependency are satisfied.
|
pip install arctic # NOTE: pip may fail to resolve the right package dependency !!! Please make sure the dependency are satisfied.
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -27,7 +27,8 @@ pip install arctic # NOTE: pip may fail to resolve the right package dependency
|
||||||
2. Please follow following steps to download example data
|
2. Please follow following steps to download example data
|
||||||
```bash
|
```bash
|
||||||
cd examples/orderbook_data/
|
cd examples/orderbook_data/
|
||||||
python ../../scripts/get_data.py download_data --target_dir . --file_name highfreq_orderbook_example_data.zip
|
gdown https://drive.google.com/uc?id=15nZF7tFT_eKVZAcMFL1qPS4jGyJflH7e # Proxies may be necessary here.
|
||||||
|
python ../../scripts/get_data.py _unzip --file_path highfreq_orderbook_example_data.zip --target_dir .
|
||||||
```
|
```
|
||||||
|
|
||||||
3. Please import the example data to your mongo db
|
3. Please import the example data to your mongo db
|
||||||
|
|
|
@ -12,15 +12,11 @@ import datetime
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from cryptography.fernet import Fernet
|
|
||||||
from qlib.utils import exists_qlib_data
|
from qlib.utils import exists_qlib_data
|
||||||
|
|
||||||
|
|
||||||
class GetData:
|
class GetData:
|
||||||
REMOTE_URL = "https://qlibpublic.blob.core.windows.net/data/default/stock_data"
|
REMOTE_URL = "https://github.com/SunsetWolf/qlib_dataset/releases/download"
|
||||||
# "?" is not included in the token.
|
|
||||||
TOKEN = b"gAAAAABkmDhojHc0VSCDdNK1MqmRzNLeDFXe5hy8obHpa6SDQh4de6nW5gtzuD-fa6O_WZb0yyqYOL7ndOfJX_751W3xN5YB4-n-P22jK-t6ucoZqhT70KPD0Lf0_P328QPJVZ1gDnjIdjhi2YLOcP4BFTHLNYO0mvzszR8TKm9iT5AKRvuysWnpi8bbYwGU9zAcJK3x9EPL43hOGtxliFHcPNGMBoJW4g_ercdhi0-Qgv5_JLsV-29_MV-_AhuaYvJuN2dEywBy"
|
|
||||||
KEY = "EYcA8cgorA8X9OhyMwVfuFxn_1W3jGk6jCbs3L2oPoA="
|
|
||||||
|
|
||||||
def __init__(self, delete_zip_file=False):
|
def __init__(self, delete_zip_file=False):
|
||||||
"""
|
"""
|
||||||
|
@ -33,9 +29,45 @@ class GetData:
|
||||||
self.delete_zip_file = delete_zip_file
|
self.delete_zip_file = delete_zip_file
|
||||||
|
|
||||||
def merge_remote_url(self, file_name: str):
|
def merge_remote_url(self, file_name: str):
|
||||||
fernet = Fernet(self.KEY)
|
"""
|
||||||
token = fernet.decrypt(self.TOKEN).decode()
|
Generate download links.
|
||||||
return f"{self.REMOTE_URL}/{file_name}?{token}"
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
file_name: str
|
||||||
|
The name of the file to be downloaded.
|
||||||
|
The file name can be accompanied by a version number, (e.g.: v2/qlib_data_simple_cn_1d_latest.zip),
|
||||||
|
if no version number is attached, it will be downloaded from v0 by default.
|
||||||
|
"""
|
||||||
|
return f"{self.REMOTE_URL}/{file_name}" if "/" in file_name else f"{self.REMOTE_URL}/v0/{file_name}"
|
||||||
|
|
||||||
|
def download(self, url: str, target_path: [Path, str]):
|
||||||
|
"""
|
||||||
|
Download a file from the specified url.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
url: str
|
||||||
|
The url of the data.
|
||||||
|
target_path: str
|
||||||
|
The location where the data is saved, including the file name.
|
||||||
|
"""
|
||||||
|
file_name = str(target_path).rsplit("/", maxsplit=1)[-1]
|
||||||
|
resp = requests.get(url, stream=True, timeout=60)
|
||||||
|
resp.raise_for_status()
|
||||||
|
if resp.status_code != 200:
|
||||||
|
raise requests.exceptions.HTTPError()
|
||||||
|
|
||||||
|
chunk_size = 1024
|
||||||
|
logger.warning(
|
||||||
|
f"The data for the example is collected from Yahoo Finance. Please be aware that the quality of the data might not be perfect. (You can refer to the original data source: https://finance.yahoo.com/lookup.)"
|
||||||
|
)
|
||||||
|
logger.info(f"{os.path.basename(file_name)} downloading......")
|
||||||
|
with tqdm(total=int(resp.headers.get("Content-Length", 0))) as p_bar:
|
||||||
|
with target_path.open("wb") as fp:
|
||||||
|
for chunk in resp.iter_content(chunk_size=chunk_size):
|
||||||
|
fp.write(chunk)
|
||||||
|
p_bar.update(chunk_size)
|
||||||
|
|
||||||
def download_data(self, file_name: str, target_dir: [Path, str], delete_old: bool = True):
|
def download_data(self, file_name: str, target_dir: [Path, str], delete_old: bool = True):
|
||||||
"""
|
"""
|
||||||
|
@ -70,21 +102,7 @@ class GetData:
|
||||||
target_path = target_dir.joinpath(_target_file_name)
|
target_path = target_dir.joinpath(_target_file_name)
|
||||||
|
|
||||||
url = self.merge_remote_url(file_name)
|
url = self.merge_remote_url(file_name)
|
||||||
resp = requests.get(url, stream=True, timeout=60)
|
self.download(url=url, target_path=target_path)
|
||||||
resp.raise_for_status()
|
|
||||||
if resp.status_code != 200:
|
|
||||||
raise requests.exceptions.HTTPError()
|
|
||||||
|
|
||||||
chunk_size = 1024
|
|
||||||
logger.warning(
|
|
||||||
f"The data for the example is collected from Yahoo Finance. Please be aware that the quality of the data might not be perfect. (You can refer to the original data source: https://finance.yahoo.com/lookup.)"
|
|
||||||
)
|
|
||||||
logger.info(f"{os.path.basename(file_name)} downloading......")
|
|
||||||
with tqdm(total=int(resp.headers.get("Content-Length", 0))) as p_bar:
|
|
||||||
with target_path.open("wb") as fp:
|
|
||||||
for chunk in resp.iter_content(chunk_size=chunk_size):
|
|
||||||
fp.write(chunk)
|
|
||||||
p_bar.update(chunk_size)
|
|
||||||
|
|
||||||
self._unzip(target_path, target_dir, delete_old)
|
self._unzip(target_path, target_dir, delete_old)
|
||||||
if self.delete_zip_file:
|
if self.delete_zip_file:
|
||||||
|
@ -99,7 +117,9 @@ class GetData:
|
||||||
return status
|
return status
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _unzip(file_path: Path, target_dir: Path, delete_old: bool = True):
|
def _unzip(file_path: [Path, str], target_dir: [Path, str], delete_old: bool = True):
|
||||||
|
file_path = Path(file_path)
|
||||||
|
target_dir = Path(target_dir)
|
||||||
if delete_old:
|
if delete_old:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"will delete the old qlib data directory(features, instruments, calendars, features_cache, dataset_cache): {target_dir}"
|
f"will delete the old qlib data directory(features, instruments, calendars, features_cache, dataset_cache): {target_dir}"
|
||||||
|
|
Загрузка…
Ссылка в новой задаче