diff --git a/infra/data-pipeline.yml b/infra/data-pipeline.yml index 691a663f..6d3b0e87 100644 --- a/infra/data-pipeline.yml +++ b/infra/data-pipeline.yml @@ -215,12 +215,6 @@ tasks: python test_scheduling_history_retriever.py retrieve" artifacts: - public/adr_cache.tar.zst: - path: /builds/worker/data/adr_cache.tar.zst - type: file - public/adr_cache.tar.version: - path: /builds/worker/data/adr_cache.tar.version - type: file public/push_data_label.json.zst: path: /builds/worker/push_data_label.json.zst type: file diff --git a/scripts/test_scheduling_history_retriever.py b/scripts/test_scheduling_history_retriever.py index fd6eb096..b3d28532 100644 --- a/scripts/test_scheduling_history_retriever.py +++ b/scripts/test_scheduling_history_retriever.py @@ -4,6 +4,7 @@ import argparse import json import os import subprocess +import time from datetime import datetime from logging import INFO, basicConfig, getLogger @@ -56,9 +57,16 @@ def rename_tasks(tasks): class Retriever(object): def __init__(self): os.makedirs("data", exist_ok=True) + self.cache_path = os.path.splitext(ADR_CACHE_DB)[0] def run_ci_recipes(self, runnable, from_months): - subprocess.run( + def upload_adr_cache(): + with open_tar_zst(f"{ADR_CACHE_DB}.zst") as tar: + tar.add(self.cache_path) + + db.upload(ADR_CACHE_DB) + + proc = subprocess.Popen( [ "run-adr", "--ref", @@ -80,13 +88,20 @@ class Retriever(object): "--runnable", runnable, ], - check=True, stdout=subprocess.DEVNULL, # Redirect to /dev/null, as the logs are too big otherwise. ) + elapsed = 0 + while proc.poll() is None: + time.sleep(6) + elapsed += 6 + if elapsed % 3600 == 0: + upload_adr_cache() + + upload_adr_cache() + def retrieve_push_data(self): # Download previous cache. - cache_path = os.path.splitext(ADR_CACHE_DB)[0] db.download(ADR_CACHE_DB) # Setup adr cache configuration. @@ -94,7 +109,7 @@ class Retriever(object): with open(os.path.expanduser("~/.config/adr/config.toml"), "w") as f: f.write( f"""[adr.cache.stores] -file = {{ driver = "file", path = "{os.path.abspath(cache_path)}" }} +file = {{ driver = "file", path = "{os.path.abspath(self.cache_path)}" }} """ ) @@ -107,9 +122,6 @@ file = {{ driver = "file", path = "{os.path.abspath(cache_path)}" }} # ActiveData and we'll see if it's enough to train a satisfying model. self.run_ci_recipes("group", 3) - with open_tar_zst(f"{ADR_CACHE_DB}.zst") as tar: - tar.add(cache_path) - zstd_compress("push_data_label.json") zstd_compress("push_data_group.json")