Bug 1318474 - Add job metadata to ORM schema and start ingesting it (#1998)

2016-11-24 12:56:50 -05:00 · 2016-11-24 12:56:50 -05:00 · 9d557622f7
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -152,7 +152,7 @@ def test_repository(transactional_db):


@pytest.fixture
-def test_job(eleven_job_blobs, jm):
+def test_job(failure_classifications, eleven_job_blobs, jm):
    from treeherder.model.models import Job

    jm.store_job_data(eleven_job_blobs[0:1])
@ -257,7 +257,7 @@ def eleven_job_blobs(jm, sample_data, sample_resultset, test_repository, mock_lo


@pytest.fixture
-def eleven_jobs_stored(jm, eleven_job_blobs):
+def eleven_jobs_stored(jm, failure_classifications, eleven_job_blobs):
    """stores a list of 11 job samples"""
    jm.store_job_data(eleven_job_blobs)

@ -345,7 +345,7 @@ def failure_lines(test_job, elasticsearch):


@pytest.fixture
-def failure_classifications():
+def failure_classifications(transactional_db):
    from treeherder.model.models import FailureClassification
    for name in ["not classified", "fixed by commit", "expected fail",
                 "intermittent", "infra", "intermittent needs filing",
--- a/tests/e2e/conftest.py
+++ b/tests/e2e/conftest.py
@ -37,7 +37,8 @@ def completed_jobs(sample_data):

@pytest.fixture
 def pending_jobs_stored(
-        jm, pending_jobs, result_set_stored, mock_post_json):
+        jm, failure_classifications, pending_jobs, result_set_stored,
+        mock_post_json):
    """
    stores a list of buildapi pending jobs into the jobs store
    using BuildApiTreeHerderAdapter
@ -55,7 +56,8 @@ def pending_jobs_stored(

@pytest.fixture
 def running_jobs_stored(
-        jm, running_jobs, result_set_stored, mock_post_json):
+        jm, failure_classifications, running_jobs, result_set_stored,
+        mock_post_json):
    """
    stores a list of buildapi running jobs
    """
@ -71,7 +73,8 @@ def running_jobs_stored(

@pytest.fixture
 def completed_jobs_stored(
-        jm, completed_jobs, result_set_stored, mock_post_json):
+        jm, failure_classifications, completed_jobs, result_set_stored,
+        mock_post_json):
    """
    stores a list of buildapi completed jobs
    """
--- a/tests/e2e/test_client_job_ingestion.py
+++ b/tests/e2e/test_client_job_ingestion.py
@ -52,8 +52,9 @@ def check_job_log(test_project, job_guid, parse_status):
    assert job_logs[0].status == parse_status


-def test_post_job_with_unparsed_log(test_project, result_set_stored,
-                                    mock_post_json, monkeypatch):
+def test_post_job_with_unparsed_log(test_project, failure_classifications,
+                                    result_set_stored, mock_post_json,
+                                    monkeypatch):
    """
    test submitting a job with an unparsed log parses the log,
    generates an appropriate set of text log steps, and calls
@ -99,6 +100,7 @@ def test_post_job_with_unparsed_log(test_project, result_set_stored,

 def test_post_job_pending_to_completed_with_unparsed_log(test_project,
                                                         result_set_stored,
+                                                         failure_classifications,
                                                         mock_post_json):

    job_guid = 'd22c74d4aa6d2a1dcba96d95dccbd5fdca70cf33'
@ -145,6 +147,7 @@ def test_post_job_pending_to_completed_with_unparsed_log(test_project,


 def test_post_job_with_parsed_log(test_project, result_set_stored,
+                                  failure_classifications,
                                  mock_post_json,
                                  monkeypatch,
                                  ):
@ -184,8 +187,9 @@ def test_post_job_with_parsed_log(test_project, result_set_stored,

 def test_post_job_with_text_log_summary_artifact_parsed(
        test_project,
-        monkeypatch,
+        failure_classifications,
        result_set_stored,
+        monkeypatch,
        mock_post_json,
        text_log_summary_dict,
        ):
@ -232,8 +236,9 @@ def test_post_job_with_text_log_summary_artifact_parsed(

 def test_post_job_with_text_log_summary_artifact_pending(
        test_project,
-        monkeypatch,
+        failure_classifications,
        result_set_stored,
+        monkeypatch,
        mock_post_json,
        text_log_summary_dict,
        ):
@ -282,8 +287,9 @@ def test_post_job_with_text_log_summary_artifact_pending(

 def test_post_job_artifacts_by_add_artifact(
        test_project,
-        monkeypatch,
+        failure_classifications,
        result_set_stored,
+        monkeypatch,
        mock_post_json,
        ):
    """
@ -387,7 +393,8 @@ def test_post_job_artifacts_by_add_artifact(
    assert mock_parse.called is False


-def test_post_job_with_tier(test_project, result_set_stored,
+def test_post_job_with_tier(test_project, failure_classifications,
+                            result_set_stored,
                            mock_post_json):
    """test submitting a job with tier specified"""

@ -412,7 +419,8 @@ def test_post_job_with_tier(test_project, result_set_stored,
        assert job['tier'] == 3


-def test_post_job_with_default_tier(test_project, result_set_stored,
+def test_post_job_with_default_tier(test_project, failure_classifications,
+                                    result_set_stored,
                                    mock_post_json):
    """test submitting a job with no tier specified gets default"""

@ -436,7 +444,8 @@ def test_post_job_with_default_tier(test_project, result_set_stored,
        assert job['tier'] == 1


-def test_post_job_with_buildapi_artifact(test_project, result_set_stored,
+def test_post_job_with_buildapi_artifact(test_project, failure_classifications,
+                                         result_set_stored,
                                         mock_post_json):
    """
    test submitting a job with a buildapi artifact gets that stored (and
--- a/tests/e2e/test_perf_ingestion.py
+++ b/tests/e2e/test_perf_ingestion.py
@ -7,8 +7,8 @@ from treeherder.perf.models import (PerformanceDatum,
                                    PerformanceSignature)


-def test_post_perf_artifact(jobs_ds, test_repository, result_set_stored,
-                            mock_post_json):
+def test_post_perf_artifact(jobs_ds, test_repository, failure_classifications,
+                            result_set_stored, mock_post_json):
    PerformanceFramework.objects.get_or_create(name='cheezburger', enabled=True)

    tjc = client.TreeherderJobCollection()
@ -52,7 +52,7 @@ def test_post_perf_artifact(jobs_ds, test_repository, result_set_stored,
    assert PerformanceDatum.objects.all().count() == 3


-def test_post_perf_artifact_revision_hash(test_repository,
+def test_post_perf_artifact_revision_hash(test_repository, failure_classifications,
                                          result_set_stored, mock_post_json):
    test_repository.save()
    PerformanceFramework.objects.get_or_create(name='cheezburger', enabled=True)
@ -100,6 +100,7 @@ def test_post_perf_artifact_revision_hash(test_repository,


 def test_post_perf_artifact_multiple(jobs_ds, test_repository,
+                                     failure_classifications,
                                     result_set_stored, mock_post_json):
    PerformanceFramework.objects.get_or_create(name='cheezburger', enabled=True)
    perfobj = {
--- a/tests/etl/test_buildapi.py
+++ b/tests/etl/test_buildapi.py
@ -88,6 +88,7 @@ def mock_buildapi_builds4h_missing_branch_url(activate_responses):

 def test_ingest_pending_jobs(jm,
                             result_set_stored,
+                             failure_classifications,
                             mock_buildapi_pending_url,
                             mock_log_parser):
    """
@ -108,6 +109,7 @@ def test_ingest_pending_jobs(jm,

 def test_ingest_running_jobs(jm,
                             result_set_stored,
+                             failure_classifications,
                             mock_buildapi_running_url,
                             mock_log_parser):
    """
@ -128,6 +130,7 @@ def test_ingest_running_jobs(jm,

 def test_ingest_builds4h_jobs(jm,
                              result_set_stored,
+                              failure_classifications,
                              mock_buildapi_builds4h_url,
                              mock_log_parser):
    """
@ -148,6 +151,7 @@ def test_ingest_builds4h_jobs(jm,

 def test_ingest_running_to_complete_job(jm,
                                        result_set_stored,
+                                        failure_classifications,
                                        mock_buildapi_running_url,
                                        mock_buildapi_builds4h_url,
                                        mock_log_parser):
@ -179,6 +183,7 @@ def test_ingest_running_to_complete_job(jm,

 def test_ingest_running_job_fields(jm,
                                   result_set_stored,
+                                   failure_classifications,
                                   mock_buildapi_running_url,
                                   mock_log_parser):
    """
@ -195,6 +200,7 @@ def test_ingest_running_job_fields(jm,

 def test_ingest_builds4h_jobs_1_missing_resultset(jm,
                                                  result_set_stored,
+                                                  failure_classifications,
                                                  mock_buildapi_builds4h_missing1_url,
                                                  mock_log_parser):
    """
@ -209,6 +215,7 @@ def test_ingest_builds4h_jobs_1_missing_resultset(jm,

 def test_ingest_builds4h_jobs_missing_branch(jm,
                                             result_set_stored,
+                                             failure_classifications,
                                             mock_buildapi_builds4h_missing_branch_url,
                                             mock_log_parser):
    """
--- a/tests/etl/test_job_loader.py
+++ b/tests/etl/test_job_loader.py
@ -45,7 +45,7 @@ def test_job_transformation(pulse_jobs, transformed_pulse_jobs):


 def test_ingest_pulse_jobs(pulse_jobs, test_project, jm, result_set_stored,
-                           mock_log_parser):
+                           failure_classifications, mock_log_parser):
    """
    Ingest a job through the JSON Schema validated JobLoader used by Pulse
    """
@ -77,7 +77,7 @@ def test_ingest_pulse_jobs(pulse_jobs, test_project, jm, result_set_stored,


 def test_ingest_pulse_jobs_bad_project(pulse_jobs, test_project, jm, result_set_stored,
-                                       mock_log_parser):
+                                       failure_classifications, mock_log_parser):
    """
    Ingest a job through the JSON Schema validated JobLoader used by Pulse
    """
@ -94,6 +94,7 @@ def test_ingest_pulse_jobs_bad_project(pulse_jobs, test_project, jm, result_set_

 def test_ingest_pulse_jobs_with_revision_hash(pulse_jobs, test_project, jm,
                                              result_set_stored,
+                                              failure_classifications,
                                              mock_log_parser):
    """
    Ingest a revision_hash job with the JobLoader used by Pulse
@ -129,7 +130,9 @@ def test_ingest_pulse_jobs_with_missing_resultset(pulse_jobs):
    assert Job.objects.count() == 0


-def test_transition_pending_running_complete(first_job, jm, mock_log_parser):
+def test_transition_pending_running_complete(first_job, jm,
+                                             failure_classifications,
+                                             mock_log_parser):
    jl = JobLoader()

    change_state_result(first_job, jl, jm, "pending", "unknown", "pending", "unknown")
@ -137,28 +140,36 @@ def test_transition_pending_running_complete(first_job, jm, mock_log_parser):
    change_state_result(first_job, jl, jm, "completed", "fail", "completed", "testfailed")


-def test_transition_complete_pending_stays_complete(first_job, jm, mock_log_parser):
+def test_transition_complete_pending_stays_complete(first_job, jm,
+                                                    failure_classifications,
+                                                    mock_log_parser):
    jl = JobLoader()

    change_state_result(first_job, jl, jm, "completed", "fail", "completed", "testfailed")
    change_state_result(first_job, jl, jm, "pending", "unknown", "completed", "testfailed")


-def test_transition_complete_running_stays_complete(first_job, jm, mock_log_parser):
+def test_transition_complete_running_stays_complete(first_job, jm,
+                                                    failure_classifications,
+                                                    mock_log_parser):
    jl = JobLoader()

    change_state_result(first_job, jl, jm, "completed", "fail", "completed", "testfailed")
    change_state_result(first_job, jl, jm, "running", "unknown", "completed", "testfailed")


-def test_transition_running_pending_stays_running(first_job, jm, mock_log_parser):
+def test_transition_running_pending_stays_running(first_job, jm,
+                                                  failure_classifications,
+                                                  mock_log_parser):
    jl = JobLoader()

    change_state_result(first_job, jl, jm, "running", "unknown", "running", "unknown")
    change_state_result(first_job, jl, jm, "pending", "unknown", "running", "unknown")


-def test_transition_pending_retry_fail_stays_retry(first_job, jm, mock_log_parser):
+def test_transition_pending_retry_fail_stays_retry(first_job, jm,
+                                                   failure_classifications,
+                                                   mock_log_parser):
    jl = JobLoader()

    change_state_result(first_job, jl, jm, "pending", "unknown", "pending", "unknown")
@ -168,7 +179,8 @@ def test_transition_pending_retry_fail_stays_retry(first_job, jm, mock_log_parse
    change_state_result(first_job, jl, jm, "completed", "fail", "completed", "retry")


-def test_skip_unscheduled(first_job, jm, mock_log_parser):
+def test_skip_unscheduled(first_job, jm, failure_classifications,
+                          mock_log_parser):
    jl = JobLoader()
    first_job["state"] = "unscheduled"
    jl.process_job_list([first_job])
--- a/tests/log_parser/test_tasks.py
+++ b/tests/log_parser/test_tasks.py
@ -60,7 +60,7 @@ def mock_mozlog_get_log_handler(monkeypatch):
                        'get_log_handle', _get_log_handle)


-def test_parse_log(jm, jobs_with_local_log, sample_resultset):
+def test_parse_log(jm, failure_classifications, jobs_with_local_log, sample_resultset):
    """
    check that 2 job_artifacts get inserted when running a parse_log task for
    a successful job and that JobDetail objects get created
@ -93,7 +93,8 @@ def test_parse_log(jm, jobs_with_local_log, sample_resultset):
    print JobDetail.objects.count() == 4


-def test_create_error_summary(jm, jobs_with_local_log, sample_resultset,
+def test_create_error_summary(jm, failure_classifications,
+                              jobs_with_local_log, sample_resultset,
                              test_repository):
    """
    check that a bug suggestions artifact gets inserted when running
--- a/tests/model/derived/test_jobs_model.py
+++ b/tests/model/derived/test_jobs_model.py
@ -47,15 +47,16 @@ def test_disconnect(jm):
    assert not jm.get_dhub().connection["master_host"]["con_obj"].open


-def test_ingest_single_sample_job(jm, sample_data,
-                                  sample_resultset, test_repository, mock_log_parser):
+def test_ingest_single_sample_job(jm, failure_classifications, sample_data,
+                                  sample_resultset,
+                                  mock_log_parser):
    """Process a single job structure in the job_data.txt file"""
    job_data = sample_data.job_data[:1]
    test_utils.do_job_ingestion(jm, job_data, sample_resultset)


-def test_ingest_all_sample_jobs(jm, sample_data,
-                                sample_resultset, test_repository, mock_log_parser):
+def test_ingest_all_sample_jobs(jm, failure_classifications, sample_data,
+                                sample_resultset, mock_log_parser):
    """
    Process each job structure in the job_data.txt file and verify.
    """
@ -63,9 +64,10 @@ def test_ingest_all_sample_jobs(jm, sample_data,
    test_utils.do_job_ingestion(jm, job_data, sample_resultset)


-def test_ingest_twice_log_parsing_status_changed(jm, sample_data,
+def test_ingest_twice_log_parsing_status_changed(jm,
+                                                 failure_classifications,
+                                                 sample_data,
                                                 sample_resultset,
-                                                 test_repository,
                                                 mock_log_parser):
    """Process a single job twice, but change the log parsing status between,
    verify that nothing changes"""
@ -107,8 +109,9 @@ def test_insert_result_sets(jm, sample_resultset, test_repository):


@pytest.mark.parametrize("same_ingestion_cycle", [False, True])
-def test_ingest_running_to_retry_sample_job(jm, sample_data,
-                                            sample_resultset, test_repository,
+def test_ingest_running_to_retry_sample_job(jm, failure_classifications,
+                                            sample_data,
+                                            sample_resultset,
                                            mock_log_parser,
                                            same_ingestion_cycle):
    """Process a single job structure in the job_data.txt file"""
@ -164,9 +167,10 @@ def test_ingest_running_to_retry_sample_job(jm, sample_data,
@pytest.mark.parametrize("ingestion_cycles", [[(0, 1), (1, 2), (2, 3)],
                                              [(0, 2), (2, 3)],
                                              [(0, 3)], [(0, 1), (1, 3)]])
-def test_ingest_running_to_retry_to_success_sample_job(jm, sample_data,
+def test_ingest_running_to_retry_to_success_sample_job(jm,
+                                                       failure_classifications,
+                                                       sample_data,
                                                       sample_resultset,
-                                                       test_repository,
                                                       mock_log_parser,
                                                       ingestion_cycles):
    # verifies that retries to success work, no matter how jobs are batched
@ -207,7 +211,7 @@ def test_ingest_running_to_retry_to_success_sample_job(jm, sample_data,
                                              [(0, 3), (3, 4)],
                                              [(0, 2), (2, 4)]])
 def test_ingest_running_to_retry_to_success_sample_job_multiple_retries(
-        jm, sample_data, sample_resultset, test_repository,
+        jm, failure_classifications, sample_data, sample_resultset,
        mock_log_parser, ingestion_cycles):
    # this verifies that if we ingest multiple retries:
    # (1) nothing errors out
@ -251,8 +255,10 @@ def test_ingest_running_to_retry_to_success_sample_job_multiple_retries(
    assert set(Job.objects.values_list('id', flat=True)) == set([j['id'] for j in jl])


-def test_ingest_retry_sample_job_no_running(jm, sample_data,
-                                            sample_resultset, test_repository, mock_log_parser):
+def test_ingest_retry_sample_job_no_running(jm, test_repository,
+                                            failure_classifications,
+                                            sample_data, sample_resultset,
+                                            mock_log_parser):
    """Process a single job structure in the job_data.txt file"""
    job_data = copy.deepcopy(sample_data.job_data[:1])
    job = job_data[0]['job']
@ -279,7 +285,8 @@ def test_ingest_retry_sample_job_no_running(jm, sample_data,
    assert Job.objects.all()[0].guid == retry_guid


-def test_calculate_durations(jm, test_repository, mock_log_parser):
+def test_calculate_durations(jm, test_repository, failure_classifications,
+                             mock_log_parser):
    """
    Test the calculation of average job durations and their use during
    subsequent job ingestion.
@ -316,7 +323,7 @@ def test_calculate_durations(jm, test_repository, mock_log_parser):
    assert durations[0].average_duration == expected_duration


-def test_cycle_all_data(jm, sample_data,
+def test_cycle_all_data(jm, failure_classifications, sample_data,
                        sample_resultset, test_repository, mock_log_parser,
                        failure_lines):
    """
@ -359,7 +366,7 @@ def test_cycle_all_data(jm, sample_data,
    assert TestFailureLine.search().params(search_type="count").execute().hits.total == 0


-def test_cycle_one_job(jm, sample_data,
+def test_cycle_one_job(jm, failure_classifications, sample_data,
                       sample_resultset, test_repository, mock_log_parser,
                       elasticsearch, failure_lines):
    """
@ -435,7 +442,7 @@ def test_cycle_one_job(jm, sample_data,
    assert set(int(item.meta.id) for item in TestFailureLine.search().execute()) == set(item.id for item in extra_objects["failure_lines"][1])


-def test_cycle_all_data_in_chunks(jm, sample_data,
+def test_cycle_all_data_in_chunks(jm, failure_classifications, sample_data,
                                  sample_resultset, test_repository, mock_log_parser):
    """
    Test cycling the sample data in chunks.
@ -493,8 +500,9 @@ def test_cycle_task_set_meta(jm):
    assert [item.id for item in TaskSetMeta.objects.all()] == [to_keep.id]


-def test_cycle_job_model_reference_data(jm, sample_data, sample_resultset,
-                                        test_repository, mock_log_parser):
+def test_cycle_job_model_reference_data(jm, failure_classifications,
+                                        sample_data, sample_resultset,
+                                        mock_log_parser):
    job_data = sample_data.job_data[:20]
    test_utils.do_job_ingestion(jm, job_data, sample_resultset, False)

@ -522,7 +530,7 @@ def test_cycle_job_model_reference_data(jm, sample_data, sample_resultset,
    assert Machine.objects.filter(id__in=original_machine_ids).count() == len(original_machine_ids)


-def test_bad_date_value_ingestion(jm, test_repository, mock_log_parser):
+def test_bad_date_value_ingestion(jm, failure_classifications, mock_log_parser):
    """
    Test ingesting an blob with bad date value

@ -569,7 +577,7 @@ def test_store_result_set_data(jm, test_repository, sample_resultset):
                comments=commit['comment'])


-def test_get_job_data(jm, test_project, sample_data,
+def test_get_job_data(jm, test_project, failure_classifications, sample_data,
                      sample_resultset, test_repository, mock_log_parser):

    target_len = 10
@ -582,8 +590,9 @@ def test_get_job_data(jm, test_project, sample_data,
    assert len(job_data) is target_len


-def test_remove_existing_jobs_single_existing(jm, sample_data,
-                                              sample_resultset, mock_log_parser):
+def test_remove_existing_jobs_single_existing(jm, failure_classifications,
+                                              sample_data, sample_resultset,
+                                              mock_log_parser):
    """Remove single existing job prior to loading"""

    job_data = sample_data.job_data[:1]
@ -599,8 +608,10 @@ def test_remove_existing_jobs_single_existing(jm, sample_data,
    assert Job.objects.count() == 1


-def test_remove_existing_jobs_one_existing_one_new(jm, sample_data,
-                                                   sample_resultset, mock_log_parser):
+def test_remove_existing_jobs_one_existing_one_new(jm, failure_classifications,
+                                                   sample_data,
+                                                   sample_resultset,
+                                                   mock_log_parser):
    """Remove single existing job prior to loading"""

    job_data = sample_data.job_data[:1]
@ -612,8 +623,10 @@ def test_remove_existing_jobs_one_existing_one_new(jm, sample_data,
    assert Job.objects.count() == 1


-def test_new_job_in_exclusion_profile(jm, sample_data, sample_resultset, mock_log_parser,
-                                      test_sheriff, test_project, result_set_stored):
+def test_new_job_in_exclusion_profile(jm, failure_classifications, sample_data,
+                                      sample_resultset, mock_log_parser,
+                                      test_sheriff, test_project,
+                                      result_set_stored):
    for job in sample_data.job_data[:2]:
        job["revision"] = result_set_stored[0]["revision"]

@ -647,7 +660,7 @@ def test_new_job_in_exclusion_profile(jm, sample_data, sample_resultset, mock_lo
    assert lower_tier_signatures[0]['tier'] == 2


-def test_ingesting_skip_existing(jm, sample_data,
+def test_ingesting_skip_existing(jm, failure_classifications, sample_data,
                                 sample_resultset, mock_log_parser):
    """Remove single existing job prior to loading"""
    job_data = sample_data.job_data[:1]
@ -660,7 +673,8 @@ def test_ingesting_skip_existing(jm, sample_data,
    assert Job.objects.count() == 2


-def test_ingest_job_with_updated_job_group(jm, sample_data, mock_log_parser,
+def test_ingest_job_with_updated_job_group(jm, failure_classifications,
+                                           sample_data, mock_log_parser,
                                           result_set_stored):
    """
    When a job_type is associated with a job group on data ingestion,
@ -697,7 +711,8 @@ def test_ingest_job_with_updated_job_group(jm, sample_data, mock_log_parser,
        JobGroup.objects.get(name="second group name")


-def test_ingest_job_with_revision_hash(jm, test_repository, sample_data,
+def test_ingest_job_with_revision_hash(jm, test_repository,
+                                       failure_classifications, sample_data,
                                       mock_log_parser, sample_resultset):
    """
    Test ingesting a job with only a revision hash, no revision.  And the
@ -723,6 +738,7 @@ def test_ingest_job_with_revision_hash(jm, test_repository, sample_data,


 def test_ingest_job_revision_and_revision_hash(jm, test_repository,
+                                               failure_classifications,
                                               sample_data, mock_log_parser,
                                               sample_resultset):

@ -747,6 +763,7 @@ def test_ingest_job_revision_and_revision_hash(jm, test_repository,


 def test_ingest_job_revision_hash_blank_revision(jm, test_repository,
+                                                 failure_classifications,
                                                 sample_data, mock_log_parser,
                                                 sample_resultset):

--- a/tests/webapp/api/test_jobs_api.py
+++ b/tests/webapp/api/test_jobs_api.py
@ -482,7 +482,8 @@ def test_list_similar_jobs(webapp, eleven_jobs_stored, jm):
    assert len(similar_jobs['results']) == 3


-def test_job_create(webapp, test_repository, test_user, eleven_job_blobs, monkeypatch, jm):
+def test_job_create(webapp, test_repository, test_user, eleven_job_blobs,
+                    failure_classifications, monkeypatch, jm):
    monkeypatch.setattr(JobsViewSet, 'permission_classes', ())

    url = reverse("jobs-list",
--- a/tests/webapp/api/test_resultset_api.py
+++ b/tests/webapp/api/test_resultset_api.py
@ -419,8 +419,9 @@ def test_resultset_create(jm, test_repository, sample_resultset,
        [rs['revision'] for rs in sample_resultset])


-def test_resultset_cancel_all(jm, push_with_three_jobs,
-                              pulse_action_consumer, test_user):
+def test_resultset_cancel_all(jm, failure_classifications,
+                              push_with_three_jobs, pulse_action_consumer,
+                              test_user):
    """
    Issue cancellation of a resultset with three unfinished jobs.
    """
@ -454,9 +455,8 @@ def test_resultset_status(webapp, test_job, test_user):
    """
    test retrieving the status of a resultset
    """
-    # create a failure classification corresponding to "not successful"
-    failure_classification = FailureClassification.objects.create(
-        id=2, name="fixed by commit")
+    failure_classification = FailureClassification.objects.get(
+        name="fixed by commit")

    push = test_job.push

--- a/treeherder/model/derived/jobs.py
+++ b/treeherder/model/derived/jobs.py
@ -13,6 +13,7 @@ from treeherder.model.models import (BuildPlatform,
                                     Commit,
                                     Datasource,
                                     ExclusionProfile,
+                                     FailureClassification,
                                     FailureLine,
                                     Job,
                                     JobDuration,
@ -243,6 +244,9 @@ class JobsModel(TreeherderModelBase):
            placeholders=[state, job_id],
            debug_show=self.DEBUG
        )
+        Job.objects.filter(repository__name=self.project,
+                           project_specific_id=job_id).update(
+                               state=state)

    def get_incomplete_job_ids(self, push_id):
        """Get list of ids for jobs of a push that are not in complete state."""
@ -265,6 +269,11 @@ class JobsModel(TreeherderModelBase):
            placeholders=[push_id],
            debug_show=self.DEBUG
        )
+        Job.objects.filter(repository__name=self.project,
+                           push_id=push_id,
+                           state='pending').update(
+                               state='completed',
+                               result='usercancel')

        # Sending 'cancel_all' action to pulse. Right now there is no listener
        # for this, so we cannot remove 'cancel' action for each job below.
@ -345,6 +354,11 @@ class JobsModel(TreeherderModelBase):
            placeholders=[job['job_guid']],
            debug_show=self.DEBUG
        )
+        Job.objects.filter(repository__name=self.project,
+                           project_specific_id=job['id'],
+                           state='pending').update(
+                               state='completed',
+                               result='usercancel')

    def get_max_job_id(self):
        """Get the maximum job id."""
@ -376,6 +390,9 @@ class JobsModel(TreeherderModelBase):
            ],
            debug_show=self.DEBUG
        )
+        Job.objects.filter(repository__name=self.project,
+                           project_specific_id=job_id).update(
+                               failure_classification_id=failure_classification_id)

    def calculate_durations(self, sample_window_seconds, debug):
        # Get the most recent timestamp from jobs
@ -656,6 +673,9 @@ into chunks of chunk_size size. Returns the number of result sets deleted"""
                debug_show=self.DEBUG,
                placeholders=coalesced_job_guid_placeholders,
                executemany=True)
+            for (coalesced_to_guid, job_guid) in coalesced_job_guid_placeholders:
+                Job.objects.filter(guid=job_guid).update(
+                    coalesced_to_guid=coalesced_to_guid)

    def _remove_existing_jobs(self, data):
        """
@ -804,6 +824,9 @@ into chunks of chunk_size size. Returns the number of result sets deleted"""

        reference_data_name = job_datum.get('reference_data_name', None)

+        default_failure_classification = FailureClassification.objects.get(
+            name='not classified')
+
        sh = sha1()
        sh.update(''.join(
            map(lambda x: str(x),
@ -943,6 +966,26 @@ into chunks of chunk_size size. Returns the number of result sets deleted"""
            repository=Repository.objects.get(name=self.project),
            project_specific_id=ds_job_id,
            defaults={
+                'signature': signature,
+                'build_platform': build_platform,
+                'machine_platform': machine_platform,
+                'machine': machine,
+                'option_collection_hash': option_collection_hash,
+                'job_type': job_type,
+                'product': product,
+                'failure_classification': default_failure_classification,
+                'who': who,
+                'reason': reason,
+                'result': result,
+                'state': state,
+                'tier': tier,
+                'last_modified': datetime.now(),
+                'submit_time': datetime.fromtimestamp(
+                    self.get_number(job_datum.get('submit_timestamp'))),
+                'start_time': datetime.fromtimestamp(
+                    self.get_number(job_datum.get('start_timestamp'))),
+                'end_time': datetime.fromtimestamp(
+                    self.get_number(job_datum.get('end_timestamp'))),
                'guid': job_guid,
                'push_id': push_id
            })
--- a/treeherder/model/management/commands/migrate_job_metadata.py
+++ b/treeherder/model/management/commands/migrate_job_metadata.py
@ -0,0 +1,118 @@
+import datetime
+
+import MySQLdb
+from django.conf import settings
+from django.core.management.base import BaseCommand
+from django.db import (connection,
+                       transaction)
+
+from treeherder.model.models import (Datasource,
+                                     Job,
+                                     ReferenceDataSignatures,
+                                     Repository)
+
+
+class Command(BaseCommand):
+
+    help = 'Migrate per-project job metadata to master database'
+
+    def handle(self, *args, **options):
+
+        for ds in Datasource.objects.all():
+            self.stdout.write('{}\n'.format(ds.project))
+            try:
+                repository = Repository.objects.get(name=ds.project)
+            except:
+                self.stderr.write('No repository for datasource project {}, skipping\n'.format(
+                    ds.project))
+                continue
+
+            db_options = settings.DATABASES['default'].get('OPTIONS', {})
+            db = MySQLdb.connect(
+                host=settings.DATABASES['default']['HOST'],
+                db=ds.name,
+                user=settings.DATABASES['default']['USER'],
+                passwd=settings.DATABASES['default'].get('PASSWORD') or '',
+                **db_options
+            )
+            c = db.cursor()
+            treeherder_c = connection.cursor()
+
+            signature_id_map = {}
+            min_job_id = 0
+            while True:
+                job_ids_to_migrate = Job.objects.filter(
+                    repository=repository, machine=None,
+                    project_specific_id__gt=min_job_id).values_list(
+                        'project_specific_id', flat=True).order_by(
+                            'project_specific_id')[:1000]
+                if not job_ids_to_migrate:
+                    # done for this project!
+                    break
+
+                c.execute("""select id, signature, job_coalesced_to_guid, build_platform_id, machine_platform_id, machine_id, option_collection_hash, job_type_id, product_id, failure_classification_id, who, reason, result, state, submit_timestamp, start_timestamp, end_timestamp, last_modified, running_eta, tier from job where id in ({})""".format(
+                    ','.join([str(job_id) for job_id in job_ids_to_migrate])))
+                with transaction.atomic():
+                    for (id, signature, job_coalesced_to_guid, build_platform_id, machine_platform_id,
+                         machine_id, option_collection_hash, job_type_id, product_id, failure_classification_id,
+                         who, reason, result, state, submit_timestamp, start_timestamp, end_timestamp, last_modified,
+                         running_eta, tier) in c.fetchall():
+                        signature_id = signature_id_map.get(signature)
+                        if not signature_id:
+                            try:
+                                signature_id = ReferenceDataSignatures.objects.values_list('id', flat=True).get(
+                                    repository=repository.name, signature=signature)
+                            except ReferenceDataSignatures.DoesNotExist:
+                                print "WARNING: non-existing refdata signature: {}".format(signature)
+                                continue
+                            signature_id_map[signature] = signature_id
+                        treeherder_c.execute('''
+                        update job set
+                        signature_id=%s,
+                        coalesced_to_guid=%s,
+                        build_platform_id=%s,
+                        machine_platform_id=%s,
+                        machine_id=%s,
+                        option_collection_hash=%s,
+                        job_type_id=%s,
+                        product_id=%s,
+                        failure_classification_id=%s,
+                        who=%s,
+                        reason=%s,
+                        result=%s,
+                        state=%s,
+                        submit_time=%s,
+                        start_time=%s,
+                        end_time=%s,
+                        last_modified=%s,
+                        running_eta=%s,
+                        tier=%s
+                        where repository_id=%s and project_specific_id=%s
+                        ''', [
+                            signature_id,
+                            job_coalesced_to_guid,
+                            build_platform_id,
+                            machine_platform_id,
+                            machine_id,
+                            option_collection_hash,
+                            job_type_id,
+                            product_id,
+                            failure_classification_id,
+                            who,
+                            reason,
+                            result,
+                            state,
+                            datetime.datetime.fromtimestamp(
+                                submit_timestamp),
+                            datetime.datetime.fromtimestamp(
+                                start_timestamp),
+                            datetime.datetime.fromtimestamp(
+                                end_timestamp),
+                            last_modified,
+                            running_eta,
+                            tier,
+                            repository.id,
+                            id])
+                    print '.',
+                    min_job_id = job_ids_to_migrate[len(job_ids_to_migrate) - 1]
+            treeherder_c.close()
--- a/treeherder/model/migrations/0041_job_metadata.py
+++ b/treeherder/model/migrations/0041_job_metadata.py
@ -0,0 +1,109 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('model', '0040_push_and_commit_orm_2'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='job',
+            name='build_platform',
+            field=models.ForeignKey(default=None, to='model.BuildPlatform', null=True),
+        ),
+        migrations.AddField(
+            model_name='job',
+            name='coalesced_to_guid',
+            field=models.CharField(default=None, max_length=50, null=True),
+        ),
+        migrations.AddField(
+            model_name='job',
+            name='end_time',
+            field=models.DateTimeField(default=None, null=True),
+        ),
+        migrations.AddField(
+            model_name='job',
+            name='failure_classification',
+            field=models.ForeignKey(default=None, to='model.FailureClassification', null=True),
+        ),
+        migrations.AddField(
+            model_name='job',
+            name='job_type',
+            field=models.ForeignKey(default=None, to='model.JobType', null=True),
+        ),
+        migrations.AddField(
+            model_name='job',
+            name='last_modified',
+            field=models.DateTimeField(default=None, null=True),
+        ),
+        migrations.AddField(
+            model_name='job',
+            name='machine',
+            field=models.ForeignKey(default=None, to='model.Machine', null=True),
+        ),
+        migrations.AddField(
+            model_name='job',
+            name='machine_platform',
+            field=models.ForeignKey(default=None, to='model.MachinePlatform', null=True),
+        ),
+        migrations.AddField(
+            model_name='job',
+            name='option_collection_hash',
+            field=models.CharField(default=None, max_length=64, null=True),
+        ),
+        migrations.AddField(
+            model_name='job',
+            name='product',
+            field=models.ForeignKey(default=None, to='model.Product', null=True),
+        ),
+        migrations.AddField(
+            model_name='job',
+            name='reason',
+            field=models.CharField(default=None, max_length=125, null=True),
+        ),
+        migrations.AddField(
+            model_name='job',
+            name='result',
+            field=models.CharField(default=None, max_length=25, null=True),
+        ),
+        migrations.AddField(
+            model_name='job',
+            name='running_eta',
+            field=models.PositiveIntegerField(default=None, null=True),
+        ),
+        migrations.AddField(
+            model_name='job',
+            name='signature',
+            field=models.ForeignKey(default=None, to='model.ReferenceDataSignatures', null=True),
+        ),
+        migrations.AddField(
+            model_name='job',
+            name='start_time',
+            field=models.DateTimeField(default=None, null=True),
+        ),
+        migrations.AddField(
+            model_name='job',
+            name='state',
+            field=models.CharField(default=None, max_length=25, null=True),
+        ),
+        migrations.AddField(
+            model_name='job',
+            name='submit_time',
+            field=models.DateTimeField(default=None, null=True),
+        ),
+        migrations.AddField(
+            model_name='job',
+            name='tier',
+            field=models.PositiveIntegerField(default=None, null=True),
+        ),
+        migrations.AddField(
+            model_name='job',
+            name='who',
+            field=models.CharField(default=None, max_length=50, null=True),
+        ),
+    ]
--- a/treeherder/model/models.py
+++ b/treeherder/model/models.py
@ -662,11 +662,7 @@ class JobDuration(models.Model):

 class Job(models.Model):
    """
-    Representation of a treeherder job
-
-    This is currently a transitional representation intended to assist in
-    cross referencing data between the per-project databases and those
-    objects in the Django ORM
+    This class represents a build or test job in Treeherder
    """
    id = BigAutoField(primary_key=True)
    repository = models.ForeignKey(Repository)
@ -676,6 +672,33 @@ class Job(models.Model):
    # https://bugzilla.mozilla.org/show_bug.cgi?id=1265503
    project_specific_id = models.PositiveIntegerField(db_index=True)

+    coalesced_to_guid = models.CharField(max_length=50, null=True,
+                                         default=None)
+    signature = models.ForeignKey(ReferenceDataSignatures, null=True,
+                                  default=None)
+    build_platform = models.ForeignKey(BuildPlatform, null=True,
+                                       default=None)
+    machine_platform = models.ForeignKey(MachinePlatform, null=True,
+                                         default=None)
+    machine = models.ForeignKey(Machine, null=True, default=None)
+    option_collection_hash = models.CharField(max_length=64, null=True,
+                                              default=None)
+    job_type = models.ForeignKey(JobType, null=True, default=None)
+    product = models.ForeignKey(Product, null=True, default=None)
+    failure_classification = models.ForeignKey(FailureClassification,
+                                               null=True, default=None)
+    who = models.CharField(max_length=50, null=True, default=None)
+    reason = models.CharField(max_length=125, null=True, default=None)
+    result = models.CharField(max_length=25, null=True, default=None)
+    state = models.CharField(max_length=25, null=True, default=None)
+
+    submit_time = models.DateTimeField(null=True, default=None)
+    start_time = models.DateTimeField(null=True, default=None)
+    end_time = models.DateTimeField(null=True, default=None)
+    last_modified = models.DateTimeField(null=True, default=None)
+    running_eta = models.PositiveIntegerField(null=True, default=None)
+    tier = models.PositiveIntegerField(null=True, default=None)
+
    push = models.ForeignKey(Push)

    class Meta:
@ -686,6 +709,10 @@ class Job(models.Model):
        return "{0} {1} {2} {3}".format(self.id, self.repository, self.guid,
                                        self.project_specific_id)

+    def save(self, *args, **kwargs):
+        self.last_modified = datetime.datetime.now()
+        super(Job, self).save(*args, **kwargs)
+
    def is_fully_autoclassified(self):
        """
        Returns whether a job is fully autoclassified (i.e. we have