made more changes

2014-05-14 04:34:01 +00:00 · 2014-05-14 04:34:01 +00:00 · adfd3cff49
--- a/tests/jobs_test.json
+++ b/tests/jobs_test.json
@ -3,10 +3,20 @@
        "get_jobs_for_cycling": {

            "sql": "SELECT id FROM `job` WHERE `result_set_id` IN (
-                        SELECT id FROM result_set WHERE push_timestamp = ?
+                        SELECT id FROM result_set WHERE push_timestamp >= ?
                    )",
            "host": "master_host"
        },
+        "get_result_set_jobs":{
+
+            "sql":"SELECT j.id, j.job_guid
+                   FROM result_set AS r
+                   INNER JOIN job AS j
+                        ON r.id = j.result_set_id
+                   WHERE r.id = ?",
+
+            "host": "master_host"
+        },
        "job": {
            "sql": "SELECT * FROM `job` WHERE id = ?",
            "host": "master_host"
@ -71,7 +81,7 @@
                "host":"master_host"
        },
        "set_result_sets_push_timestamp":{
-            "sql":"UPDATE `result_set` SET `push_timestamp` = ? WHERE `push_timestamp` > ?",
+            "sql":"UPDATE `result_set` SET `push_timestamp` = ?",

            "host":"master_host"
        },
--- a/tests/model/derived/test_jobs_model.py
+++ b/tests/model/derived/test_jobs_model.py
@ -67,11 +67,11 @@ def test_cycle_all_data(jm, refdata, sample_data, initial_data, sample_resultset
    test_utils.do_job_ingestion(jm, refdata, job_data, sample_resultset, False)

    # build a date that will cause the data to be cycled
-    cycle_date_ts = int(time.time() - (jm.DATA_CYCLE_INTERVAL + 100))
+    cycle_date_ts = int(time.time() - (jm.DATA_CYCLE_INTERVAL + 100000))

    jm.get_dhub(jm.CT_JOBS).execute(
        proc="jobs_test.updates.set_result_sets_push_timestamp",
-        placeholders=[cycle_date_ts, cycle_date_ts]
+        placeholders=[cycle_date_ts]
    )

    jobs_to_be_deleted = jm.get_dhub(jm.CT_JOBS).execute(
@ -105,8 +105,15 @@ def test_cycle_one_job(jm, refdata, sample_data, initial_data, sample_resultset,
    job_data = sample_data.job_data[:20]
    test_utils.do_job_ingestion(jm, refdata, job_data, sample_resultset, False)

+    # set all the result_sets to a non cycle time
+    non_cycle_date_ts = int(time.time() - (jm.DATA_CYCLE_INTERVAL - 100000))
+    jm.get_dhub(jm.CT_JOBS).execute(
+        proc="jobs_test.updates.set_result_sets_push_timestamp",
+        placeholders=[ non_cycle_date_ts ]
+    )
+
    # build a date that will cause the data to be cycled
-    cycle_date_ts = int(time.time() - (jm.DATA_CYCLE_INTERVAL + 100))
+    cycle_date_ts = int(time.time() - (jm.DATA_CYCLE_INTERVAL + 100000))

    jm.get_dhub(jm.CT_JOBS).execute(
        proc="jobs_test.updates.set_one_result_set_push_timestamp",
@ -114,24 +121,27 @@ def test_cycle_one_job(jm, refdata, sample_data, initial_data, sample_resultset,
    )

    jobs_to_be_deleted = jm.get_dhub(jm.CT_JOBS).execute(
-        proc="jobs_test.selects.get_jobs_for_cycling",
-        placeholders=[cycle_date_ts]
+        proc="jobs_test.selects.get_result_set_jobs",
+        placeholders=[1]
    )

    job_count = len(jobs_to_be_deleted)

-    jobs_before = jm.get_dhub(jm.CT_JOBS).execute(proc="jobs_test.selects.jobs")
-
    sql_targets = jm.cycle_data({}, False)

-    jobs_after = jm.get_dhub(jm.CT_JOBS).execute(proc="jobs_test.selects.jobs")
+    assert sql_targets['jobs.deletes.cycle_job'] == job_count
+
+    #Confirm that the target result set has no jobs in the
+    #jobs table
+    jobs_count_after_delete = jm.get_dhub(jm.CT_JOBS).execute(
+        proc="jobs_test.selects.get_result_set_jobs",
+        placeholders=[1]
+    )
+
+    assert len(jobs_count_after_delete) == 0

    jm.disconnect()

-    assert len(jobs_before) - len(jobs_after) == job_count
-
-    assert sql_targets['jobs.deletes.cycle_job'] == job_count
-
 def test_bad_date_value_ingestion(jm, initial_data, mock_log_parser):
    """
    Test ingesting an blob with bad date value
--- a/treeherder/model/derived/jobs.py
+++ b/treeherder/model/derived/jobs.py
@ -530,12 +530,15 @@ class JobsModel(TreeherderModelBase):

        length = len(sorted_list)

+        if length == 0:
+            return 0
+
        # Cannot take the median with only on sample,
        # return it
-        if length == 1:
+        elif length == 1:
            return sorted_list[0]

-        if not length % 2:
+        elif not length % 2:
            return round(
                (sorted_list[length / 2] + sorted_list[length / 2 - 1]) / 2, 0
                    )
--- a/treeherder/model/fixtures/tasks.json
+++ b/treeherder/model/fixtures/tasks.json
@ -153,8 +153,8 @@
            "args": "[]",
            "enabled": true,
            "routing_key": null,
-            "crontab": null,
-            "interval": 1,
+            "crontab": 3,
+            "interval": null,
            "queue": null,
            "total_run_count": 0,
            "expires": null,
@ -183,5 +183,26 @@
            "date_changed": "2014-04-24T16:22:39",
            "description": "Broadcasts the count of unclassified job failures per repository"
        }
+    },
+    {
+        "pk": 10,
+        "model": "djcelery.periodictask",
+        "fields": {
+            "task": "calculate-eta",
+            "name": "calculate-eta",
+            "exchange": null,
+            "last_run_at": null,
+            "args": "[]",
+            "enabled": true,
+            "routing_key": null,
+            "crontab": 2,
+            "interval": null,
+            "queue": null,
+            "total_run_count": 0,
+            "expires": null,
+            "kwargs": "{}",
+            "date_changed": "2014-04-24T16:22:39",
+            "description": "Compute pending and running eta times"
+        }
    }
 ]
--- a/treeherder/model/sql/template_schema/project_jobs_1.sql.tmpl
+++ b/treeherder/model/sql/template_schema/project_jobs_1.sql.tmpl
@ -94,8 +94,8 @@ DROP TABLE IF EXISTS `job`;
 *  submit_timestamp - Time the job was submitted.
 *  start_timestamp - Time the job was started.
 *  end_timestamp - Time the job completed.
- *  pending_eta - ETA to a running state. A rolling average over a 12 hr window of start_timestamp - submit_timestamp, recomputed every 30 min.
- *  running_eta - ETA to a completed state. A rolling average over a 12 hr window of end_timestamp - start_timestamp, recomputed every 30 min.
+ *  pending_eta - ETA to a running state. A rolling average over a 6 hr window of start_timestamp - submit_timestamp, recomputed every 30 min.
+ *  running_eta - ETA to a completed state. A rolling average over a 6 hr window of end_timestamp - start_timestamp, recomputed every 30 min.
 **************************/
 CREATE TABLE `job` (
  `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
--- a/treeherder/model/tasks.py
+++ b/treeherder/model/tasks.py
@ -71,6 +71,7 @@ def unclassified_failure_count():

    unclassified_failure_publisher.disconnect()

+@task(name='calculate-eta', rate_limit='1/h')
 def calculate_eta(sample_window_seconds=21600, debug=False):

    projects = Repository.objects.all().values_list('name', flat=True)
--- a/treeherder/settings/base.py
+++ b/treeherder/settings/base.py
@ -4,6 +4,7 @@ from treeherder import path

 # needed to setup celery
 import djcelery
+from celery.schedules import crontab
 djcelery.setup_loader()

 # These settings can all be optionally set via env vars, or in local.py:
@ -202,6 +203,23 @@ CELERY_DEFAULT_ROUTING_KEY = 'default'

 CELERYBEAT_SCHEDULER = "djcelery.schedulers.DatabaseScheduler"

+CELERYBEAT_SCHEDULE = {
+
+    'add-every-24-hours':{
+
+        'task':'tasks.cycle-data',
+        # Execute daily at midnight
+        'schedule': crontab(minute=0, hour=0)
+    },
+
+    'add-every-6-hours':{
+
+        'task':'tasks.calculate-eta',
+        # Execute every 6 hours
+        'schedule':crontab(minute=0, hour='*/6')
+    }
+}
+
 # rest-framework settings
 REST_FRAMEWORK = {
    'DEFAULT_PARSER_CLASSES': (