зеркало из https://github.com/mozilla/treeherder.git
base model generic to datasource now
This commit is contained in:
Родитель
a025220123
Коммит
1290f6c646
|
@ -102,8 +102,8 @@ def jm():
|
|||
""" Give a test access to a JobsModel instance. """
|
||||
from django.conf import settings
|
||||
from treeherder.model.derived.jobs import JobsModel
|
||||
return JobsModel(settings.DATABASES["default"]["TEST_NAME"])
|
||||
|
||||
return JobsModel.create(settings.DATABASES["default"]["TEST_NAME"])
|
||||
# return JobsModel(settings.DATABASES["default"]["TEST_NAME"])
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
def jobs_ds():
|
||||
|
@ -113,7 +113,7 @@ def jobs_ds():
|
|||
project=settings.DATABASES["default"]["TEST_NAME"],
|
||||
dataset=1,
|
||||
contenttype="jobs",
|
||||
host="localhost",
|
||||
host=settings.DATABASES['default']['HOST'],
|
||||
)
|
||||
|
||||
|
||||
|
@ -125,5 +125,8 @@ def objectstore_ds():
|
|||
project=settings.DATABASES["default"]["TEST_NAME"],
|
||||
dataset=1,
|
||||
contenttype="objectstore",
|
||||
host="localhost",
|
||||
host=settings.DATABASES['default']['HOST'],
|
||||
)
|
||||
|
||||
|
||||
#ARE WE SURE LOCALHOST IS RIGHT HERE, FOR THE VAGRANT VM?
|
|
@ -55,4 +55,3 @@ def test_job_group_manager(refdata):
|
|||
assert row_data["name"] == 'mygroup'
|
||||
assert row_data["description"] == 'fill me'
|
||||
assert row_data["active_status"] == 'active'
|
||||
|
||||
|
|
|
@ -23,10 +23,11 @@ def xtest_disconnect(jm):
|
|||
assert src.dhub.connection["master_host"]["con_obj"].open is False
|
||||
|
||||
|
||||
def test_claim_objects(objectstore_ds, jm):
|
||||
def test_claim_objects(objectstore_ds, jobs_ds, jm):
|
||||
"""``claim_objects`` claims & returns unclaimed rows up to a limit."""
|
||||
|
||||
# s = objectstore_ds
|
||||
os = objectstore_ds
|
||||
j = jobs_ds
|
||||
|
||||
blobs = [
|
||||
job_json(testrun={"date": "1330454755"}),
|
||||
|
@ -59,7 +60,7 @@ def test_claim_objects(objectstore_ds, jm):
|
|||
assert loading_rows == 3
|
||||
|
||||
|
||||
def xtest_mark_object_complete(jobs_ds, jm):
|
||||
def test_mark_object_complete(jobs_ds, jm):
|
||||
"""Marks claimed row complete and records run id."""
|
||||
jm.store_job_data(job_json())
|
||||
row_id = jm.claim_objects(1)[0]["id"]
|
||||
|
@ -81,7 +82,7 @@ def xtest_process_objects(jobs_ds, jm):
|
|||
job_json(testrun={"date": "1330454755"}),
|
||||
job_json(testrun={"date": "1330454756"}),
|
||||
job_json(testrun={"date": "1330454757"}),
|
||||
]
|
||||
]
|
||||
|
||||
for blob in blobs:
|
||||
jm.store_job_data(blob)
|
||||
|
|
|
@ -5,31 +5,72 @@ access.
|
|||
"""
|
||||
from django.conf import settings
|
||||
|
||||
from treeherder.model.sql.sql_datasource import SQLDataSource
|
||||
from treeherder.model.models import Datasource
|
||||
|
||||
|
||||
class TreeherderModelBase(object):
|
||||
"""Base model class for all TreeHerder models"""
|
||||
"""
|
||||
Base model class for all derived models
|
||||
|
||||
CONTENT_TYPES = []
|
||||
"""
|
||||
|
||||
def __init__(self, project):
|
||||
"""Encapsulate the dataset access for this ``project`` """
|
||||
|
||||
self.project = project
|
||||
|
||||
self.sources = {}
|
||||
for ct in self.CONTENT_TYPES:
|
||||
self.sources[ct] = SQLDataSource(project, ct)
|
||||
|
||||
self.dhubs = {}
|
||||
self.DEBUG = settings.DEBUG
|
||||
|
||||
def __unicode__(self):
|
||||
"""Unicode representation is project name."""
|
||||
return self.project
|
||||
|
||||
def disconnect(self):
|
||||
"""Iterate over and disconnect all data sources."""
|
||||
for src in self.sources.itervalues():
|
||||
src.disconnect()
|
||||
def get_dhub(self, contenttype, procs_file_name=None):
|
||||
"""
|
||||
The configured datahub for the given contenttype
|
||||
|
||||
def get_project_cache_key(self, str_data):
|
||||
return "{0}_{1}".format(self.project, str_data)
|
||||
"""
|
||||
if not procs_file_name:
|
||||
procs_file_name = "{0}.json".format(contenttype)
|
||||
|
||||
if not contenttype in self.dhubs.keys():
|
||||
self.dhubs[contenttype] = self.get_datasource(
|
||||
contenttype).dhub(procs_file_name)
|
||||
|
||||
return self.dhubs[contenttype]
|
||||
|
||||
def get_datasource(self, contenttype):
|
||||
"""The datasource for this contenttype of the project."""
|
||||
|
||||
if not contenttype in self.sources.keys():
|
||||
self.sources[contenttype] = self._get_datasource(contenttype)
|
||||
# self.sources[contenttype] = Datasource.objects.get(
|
||||
# project = self.project,
|
||||
# contenttype = contenttype,
|
||||
# dataset = 1,
|
||||
# )
|
||||
|
||||
return self.sources[contenttype]
|
||||
|
||||
def _get_datasource(self, contenttype):
|
||||
"""Find the datasource for this contenttype in the cache."""
|
||||
candidate_sources = []
|
||||
for source in Datasource.objects.cached():
|
||||
if (source.project == self.project and
|
||||
source.contenttype == contenttype):
|
||||
candidate_sources.append(source)
|
||||
|
||||
if not candidate_sources:
|
||||
raise DatasetNotFoundError(
|
||||
"No dataset found for project %r, contenttype %r."
|
||||
% (self.project, contenttype)
|
||||
)
|
||||
|
||||
candidate_sources.sort(key=lambda s: s.dataset, reverse=True)
|
||||
|
||||
return candidate_sources[0]
|
||||
|
||||
|
||||
class DatasetNotFoundError(ValueError):
|
||||
pass
|
||||
|
|
|
@ -26,47 +26,56 @@ class JobsModel(TreeherderModelBase):
|
|||
CT_OBJECTSTORE = "objectstore"
|
||||
CONTENT_TYPES = [CT_JOBS, CT_OBJECTSTORE]
|
||||
|
||||
# @classmethod
|
||||
# def create(cls, project, hosts=None, types=None):
|
||||
# """
|
||||
# Create all the datasource tables for this project.
|
||||
#
|
||||
# ``hosts`` is an optional dictionary mapping contenttype names to the
|
||||
# database server host on which the database for that contenttype should
|
||||
# be created. Not all contenttypes need to be represented; any that
|
||||
# aren't will use the default (``TREEHERDER_DATABASE_HOST``).
|
||||
#
|
||||
# ``types`` is an optional dictionary mapping contenttype names to the
|
||||
# type of database that should be created. For MySQL/MariaDB databases,
|
||||
# use "MySQL-Engine", where "Engine" could be "InnoDB", "Aria", etc. Not
|
||||
# all contenttypes need to be represented; any that aren't will use the
|
||||
# default (``MySQL-InnoDB``).
|
||||
#
|
||||
#
|
||||
# """
|
||||
# hosts = hosts or {}
|
||||
# types = types or {}
|
||||
#
|
||||
# for ct in cls.CONTENT_TYPES:
|
||||
# Datasource.create(
|
||||
# project,
|
||||
# ct,
|
||||
# host=hosts.get(ct),
|
||||
# db_type=types.get(ct),
|
||||
# )
|
||||
#
|
||||
# return cls(project=project)
|
||||
@classmethod
|
||||
def create(cls, project, hosts=None, types=None):
|
||||
"""
|
||||
Create all the datasource tables for this project.
|
||||
|
||||
``hosts`` is an optional dictionary mapping contenttype names to the
|
||||
database server host on which the database for that contenttype should
|
||||
be created. Not all contenttypes need to be represented; any that
|
||||
aren't will use the default (``TREEHERDER_DATABASE_HOST``).
|
||||
|
||||
``types`` is an optional dictionary mapping contenttype names to the
|
||||
type of database that should be created. For MySQL/MariaDB databases,
|
||||
use "MySQL-Engine", where "Engine" could be "InnoDB", "Aria", etc. Not
|
||||
all contenttypes need to be represented; any that aren't will use the
|
||||
default (``MySQL-InnoDB``).
|
||||
|
||||
|
||||
"""
|
||||
hosts = hosts or {}
|
||||
types = types or {}
|
||||
|
||||
for ct in [cls.CT_JOBS, cls.CT_OBJECTSTORE]:
|
||||
dataset = Datasource.get_latest_dataset(project, ct)
|
||||
source = Datasource(
|
||||
project=project,
|
||||
contenttype=ct,
|
||||
dataset=dataset or 1,
|
||||
)
|
||||
source.save()
|
||||
|
||||
return cls(project=project)
|
||||
|
||||
def get_jobs_dhub(self):
|
||||
"""Get the dhub for jobs"""
|
||||
return self.get_dhub(self.CT_JOBS)
|
||||
|
||||
def get_os_dhub(self):
|
||||
"""Get the dhub for the objectstore"""
|
||||
return self.get_dhub(self.CT_OBJECTSTORE)
|
||||
|
||||
def get_oauth_consumer_secret(self, key):
|
||||
ds = self.sources[self.CT_OBJECTSTORE].datasource
|
||||
ds = self.get_datasource(self.CT_OBJECTSTORE)
|
||||
secret = ds.get_oauth_consumer_secret(key)
|
||||
return secret
|
||||
|
||||
def _get_last_insert_id(self, source=None):
|
||||
def _get_last_insert_id(self, contenttype=None):
|
||||
"""Return last-inserted ID."""
|
||||
if not source:
|
||||
source = self.CT_JOBS
|
||||
return self.sources[source].dhub.execute(
|
||||
if not contenttype:
|
||||
contenttype = self.CT_JOBS
|
||||
return self.get_dhub(contenttype).execute(
|
||||
proc='generic.selects.get_last_insert_id',
|
||||
debug_show=self.DEBUG,
|
||||
return_type='iter',
|
||||
|
@ -79,7 +88,7 @@ class JobsModel(TreeherderModelBase):
|
|||
error = "N" if error is None else "Y"
|
||||
error_msg = error or ""
|
||||
|
||||
self.sources[self.CT_OBJECTSTORE].dhub.execute(
|
||||
self.get_os_dhub().execute(
|
||||
proc='objectstore.inserts.store_json',
|
||||
placeholders=[loaded_timestamp, json_data, error, error_msg],
|
||||
debug_show=self.DEBUG
|
||||
|
@ -98,7 +107,7 @@ class JobsModel(TreeherderModelBase):
|
|||
|
||||
"""
|
||||
proc = "objectstore.selects.get_unprocessed"
|
||||
json_blobs = self.sources[self.CT_OBJECTSTORE].dhub.execute(
|
||||
json_blobs = self.get_os_dhub().execute(
|
||||
proc=proc,
|
||||
placeholders=[limit],
|
||||
debug_show=self.DEBUG,
|
||||
|
@ -274,7 +283,7 @@ class JobsModel(TreeherderModelBase):
|
|||
return job_id
|
||||
|
||||
def _insert_data(self, statement, placeholders, executemany=False):
|
||||
self.sources[self.CT_JOBS].dhub.execute(
|
||||
self.get_jobs_dhub().execute(
|
||||
proc='jobs.inserts.' + statement,
|
||||
debug_show=self.DEBUG,
|
||||
placeholders=placeholders,
|
||||
|
@ -288,7 +297,7 @@ class JobsModel(TreeherderModelBase):
|
|||
|
||||
def _get_last_insert_id(self, source=CT_JOBS):
|
||||
"""Return last-inserted ID."""
|
||||
return self.sources[source].dhub.execute(
|
||||
return self.get_dhub(source).execute(
|
||||
proc='generic.selects.get_last_insert_id',
|
||||
debug_show=self.DEBUG,
|
||||
return_type='iter',
|
||||
|
@ -360,17 +369,17 @@ class JobsModel(TreeherderModelBase):
|
|||
# Note: this claims rows for processing. Failure to call load_job_data
|
||||
# on this data will result in some json blobs being stuck in limbo
|
||||
# until another worker comes along with the same connection ID.
|
||||
self.sources[self.CT_OBJECTSTORE].dhub.execute(
|
||||
self.get_os_dhub().execute(
|
||||
proc=proc_mark,
|
||||
placeholders=[limit],
|
||||
debug_show=self.DEBUG,
|
||||
)
|
||||
)
|
||||
|
||||
resetwarnings()
|
||||
|
||||
# Return all JSON blobs claimed by this connection ID (could possibly
|
||||
# include orphaned rows from a previous run).
|
||||
json_blobs = self.sources[self.CT_OBJECTSTORE].dhub.execute(
|
||||
json_blobs = self.get_os_dhub().execute(
|
||||
proc=proc_get,
|
||||
debug_show=self.DEBUG,
|
||||
return_type='tuple'
|
||||
|
@ -380,7 +389,7 @@ class JobsModel(TreeherderModelBase):
|
|||
|
||||
def mark_object_complete(self, object_id, job_id):
|
||||
""" Call to database to mark the task completed """
|
||||
self.sources[self.CT_OBJECTSTORE].dhub.execute(
|
||||
self.get_os_dhub().execute(
|
||||
proc="objectstore.updates.mark_complete",
|
||||
placeholders=[job_id, object_id],
|
||||
debug_show=self.DEBUG
|
||||
|
@ -388,7 +397,7 @@ class JobsModel(TreeherderModelBase):
|
|||
|
||||
def mark_object_error(self, object_id, error):
|
||||
""" Call to database to mark the task completed """
|
||||
self.sources[self.CT_OBJECTSTORE].dhub.execute(
|
||||
self.get_os_dhub().execute(
|
||||
proc="objectstore.updates.mark_error",
|
||||
placeholders=[error, object_id],
|
||||
debug_show=self.DEBUG
|
||||
|
|
|
@ -2,7 +2,6 @@ import os
|
|||
from django.conf import settings
|
||||
from datasource.bases.BaseHub import BaseHub
|
||||
from datasource.DataHub import DataHub
|
||||
from .base import TreeherderModelBase
|
||||
|
||||
|
||||
class RefDataManager(object):
|
||||
|
|
|
@ -9,6 +9,7 @@ from datasource.hubs.MySQL import MySQL
|
|||
from django.conf import settings
|
||||
from django.core.cache import cache
|
||||
from django.db import models
|
||||
from django.db.models import Max
|
||||
|
||||
from treeherder import path
|
||||
|
||||
|
@ -157,13 +158,26 @@ class MachineNote(models.Model):
|
|||
|
||||
class DatasourceManager(models.Manager):
|
||||
def cached(self):
|
||||
"""Return all datasources, caching the results."""
|
||||
"""
|
||||
Return all datasources, caching the results.
|
||||
|
||||
"""
|
||||
sources = cache.get(SOURCES_CACHE_KEY)
|
||||
if not sources:
|
||||
sources = list(self.all())
|
||||
cache.set(SOURCES_CACHE_KEY, sources)
|
||||
return sources
|
||||
|
||||
def latest(self, project, contenttype):
|
||||
"""
|
||||
@@@ TODO: this needs to use the cache, probably
|
||||
"""
|
||||
ds = Datasource.get_latest_dataset(project, contenttype)
|
||||
return self.get(
|
||||
project=project,
|
||||
contenttype=contenttype,
|
||||
dataset=ds)
|
||||
|
||||
|
||||
class Datasource(models.Model):
|
||||
id = models.IntegerField(primary_key=True)
|
||||
|
@ -192,6 +206,14 @@ class Datasource(models.Model):
|
|||
cache.delete(SOURCES_CACHE_KEY)
|
||||
cls.objects.cached()
|
||||
|
||||
@classmethod
|
||||
def get_latest_dataset(cls, project, contenttype):
|
||||
"""get the latest dataset"""
|
||||
return cls.objects.filter(
|
||||
project=project,
|
||||
contenttype=contenttype,
|
||||
).aggregate(Max("dataset"))["dataset__max"]
|
||||
|
||||
@property
|
||||
def key(self):
|
||||
"""Unique key for a data source is the project, contenttype, dataset."""
|
||||
|
@ -202,6 +224,31 @@ class Datasource(models.Model):
|
|||
"""Unicode representation is the project's unique key."""
|
||||
return unicode(self.key)
|
||||
|
||||
def create_next_dataset(self, schema_file=None):
|
||||
"""
|
||||
Create and return the next dataset for this project/contenttype.
|
||||
|
||||
The database for the new dataset will be located on the same host.
|
||||
|
||||
"""
|
||||
dataset = Datasource.objects.filter(
|
||||
project=self.project,
|
||||
contenttype=self.contenttype
|
||||
).order_by("-dataset")[0].dataset + 1
|
||||
|
||||
# @@@ should we store the schema file name used for the previous
|
||||
# dataset in the db and use the same one again automatically? or should
|
||||
# we actually copy the schema of an existing dataset rather than using
|
||||
# a schema file at all?
|
||||
return Datasource.objects.create(
|
||||
project=self.project,
|
||||
contenttype=self.contenttype,
|
||||
dataset=dataset,
|
||||
host=self.datasource.host,
|
||||
db_type=self.datasource.type,
|
||||
schema_file=schema_file,
|
||||
)
|
||||
|
||||
def save(self, *args, **kwargs):
|
||||
inserting = not self.pk
|
||||
# in case you want to add a new datasource and provide
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"inserts":{
|
||||
"store_json":{
|
||||
|
||||
"sql":"INSERT INTO `objectstore` (`date_timestamp`,
|
||||
"sql":"INSERT INTO `objectstore` (`loaded_timestamp`,
|
||||
`json_blob`,
|
||||
`error`,
|
||||
`error_msg`)
|
||||
|
|
Загрузка…
Ссылка в новой задаче