base model generic to datasource now

2013-04-18 22:26:30 -07:00 · 2013-04-18 22:26:30 -07:00 · 1290f6c646
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -102,8 +102,8 @@ def jm():
    """ Give a test access to a JobsModel instance. """
    from django.conf import settings
    from treeherder.model.derived.jobs import JobsModel
-    return JobsModel(settings.DATABASES["default"]["TEST_NAME"])
-
+    return JobsModel.create(settings.DATABASES["default"]["TEST_NAME"])
+    # return JobsModel(settings.DATABASES["default"]["TEST_NAME"])

@pytest.fixture(scope='session')
 def jobs_ds():
@ -113,7 +113,7 @@ def jobs_ds():
        project=settings.DATABASES["default"]["TEST_NAME"],
        dataset=1,
        contenttype="jobs",
-        host="localhost",
+        host=settings.DATABASES['default']['HOST'],
    )


@ -125,5 +125,8 @@ def objectstore_ds():
        project=settings.DATABASES["default"]["TEST_NAME"],
        dataset=1,
        contenttype="objectstore",
-        host="localhost",
+        host=settings.DATABASES['default']['HOST'],
    )
+
+
+#ARE WE SURE LOCALHOST IS RIGHT HERE, FOR THE VAGRANT VM?
--- a/tests/model/derived/refdata.py
+++ b/tests/model/derived/refdata.py
@ -55,4 +55,3 @@ def test_job_group_manager(refdata):
    assert row_data["name"] == 'mygroup'
    assert row_data["description"] == 'fill me'
    assert row_data["active_status"] == 'active'
-
--- a/tests/model/derived/test_objectstore_model.py
+++ b/tests/model/derived/test_objectstore_model.py
@ -23,10 +23,11 @@ def xtest_disconnect(jm):
        assert src.dhub.connection["master_host"]["con_obj"].open is False


-def test_claim_objects(objectstore_ds, jm):
+def test_claim_objects(objectstore_ds, jobs_ds, jm):
    """``claim_objects`` claims & returns unclaimed rows up to a limit."""

-    # s = objectstore_ds
+    os = objectstore_ds
+    j = jobs_ds

    blobs = [
        job_json(testrun={"date": "1330454755"}),
@ -59,7 +60,7 @@ def test_claim_objects(objectstore_ds, jm):
    assert loading_rows == 3


-def xtest_mark_object_complete(jobs_ds, jm):
+def test_mark_object_complete(jobs_ds, jm):
    """Marks claimed row complete and records run id."""
    jm.store_job_data(job_json())
    row_id = jm.claim_objects(1)[0]["id"]
@ -81,7 +82,7 @@ def xtest_process_objects(jobs_ds, jm):
        job_json(testrun={"date": "1330454755"}),
        job_json(testrun={"date": "1330454756"}),
        job_json(testrun={"date": "1330454757"}),
-        ]
+    ]

    for blob in blobs:
        jm.store_job_data(blob)
--- a/treeherder/model/derived/base.py
+++ b/treeherder/model/derived/base.py
@ -5,31 +5,72 @@ access.
 """
 from django.conf import settings

-from treeherder.model.sql.sql_datasource import SQLDataSource
+from treeherder.model.models import Datasource


 class TreeherderModelBase(object):
-    """Base model class for all TreeHerder models"""
+    """
+    Base model class for all derived models

-    CONTENT_TYPES = []
+    """

    def __init__(self, project):
+        """Encapsulate the dataset access for this ``project`` """
+
        self.project = project
-
        self.sources = {}
-        for ct in self.CONTENT_TYPES:
-            self.sources[ct] = SQLDataSource(project, ct)
-
+        self.dhubs = {}
        self.DEBUG = settings.DEBUG

    def __unicode__(self):
        """Unicode representation is project name."""
        return self.project

-    def disconnect(self):
-        """Iterate over and disconnect all data sources."""
-        for src in self.sources.itervalues():
-            src.disconnect()
+    def get_dhub(self, contenttype, procs_file_name=None):
+        """
+        The configured datahub for the given contenttype

-    def get_project_cache_key(self, str_data):
-        return "{0}_{1}".format(self.project, str_data)
+        """
+        if not procs_file_name:
+            procs_file_name = "{0}.json".format(contenttype)
+
+        if not contenttype in self.dhubs.keys():
+            self.dhubs[contenttype] = self.get_datasource(
+                contenttype).dhub(procs_file_name)
+
+        return self.dhubs[contenttype]
+
+    def get_datasource(self, contenttype):
+        """The datasource for this contenttype of the project."""
+
+        if not contenttype in self.sources.keys():
+            self.sources[contenttype] = self._get_datasource(contenttype)
+            # self.sources[contenttype] = Datasource.objects.get(
+            #     project = self.project,
+            #     contenttype = contenttype,
+            #     dataset = 1,
+            # )
+
+        return self.sources[contenttype]
+
+    def _get_datasource(self, contenttype):
+        """Find the datasource for this contenttype in the cache."""
+        candidate_sources = []
+        for source in Datasource.objects.cached():
+            if (source.project == self.project and
+                    source.contenttype == contenttype):
+                candidate_sources.append(source)
+
+        if not candidate_sources:
+            raise DatasetNotFoundError(
+                "No dataset found for project %r, contenttype %r."
+                % (self.project, contenttype)
+            )
+
+        candidate_sources.sort(key=lambda s: s.dataset, reverse=True)
+
+        return candidate_sources[0]
+
+
+class DatasetNotFoundError(ValueError):
+    pass
--- a/treeherder/model/derived/jobs.py
+++ b/treeherder/model/derived/jobs.py
@ -26,47 +26,56 @@ class JobsModel(TreeherderModelBase):
    CT_OBJECTSTORE = "objectstore"
    CONTENT_TYPES = [CT_JOBS, CT_OBJECTSTORE]

-    # @classmethod
-    # def create(cls, project, hosts=None, types=None):
-    #     """
-    #     Create all the datasource tables for this project.
-    #
-    #     ``hosts`` is an optional dictionary mapping contenttype names to the
-    #     database server host on which the database for that contenttype should
-    #     be created. Not all contenttypes need to be represented; any that
-    #     aren't will use the default (``TREEHERDER_DATABASE_HOST``).
-    #
-    #     ``types`` is an optional dictionary mapping contenttype names to the
-    #     type of database that should be created. For MySQL/MariaDB databases,
-    #     use "MySQL-Engine", where "Engine" could be "InnoDB", "Aria", etc. Not
-    #     all contenttypes need to be represented; any that aren't will use the
-    #     default (``MySQL-InnoDB``).
-    #
-    #
-    #     """
-    #     hosts = hosts or {}
-    #     types = types or {}
-    #
-    #     for ct in cls.CONTENT_TYPES:
-    #         Datasource.create(
-    #             project,
-    #             ct,
-    #             host=hosts.get(ct),
-    #             db_type=types.get(ct),
-    #         )
-    #
-    #     return cls(project=project)
+    @classmethod
+    def create(cls, project, hosts=None, types=None):
+        """
+        Create all the datasource tables for this project.
+
+        ``hosts`` is an optional dictionary mapping contenttype names to the
+        database server host on which the database for that contenttype should
+        be created. Not all contenttypes need to be represented; any that
+        aren't will use the default (``TREEHERDER_DATABASE_HOST``).
+
+        ``types`` is an optional dictionary mapping contenttype names to the
+        type of database that should be created. For MySQL/MariaDB databases,
+        use "MySQL-Engine", where "Engine" could be "InnoDB", "Aria", etc. Not
+        all contenttypes need to be represented; any that aren't will use the
+        default (``MySQL-InnoDB``).
+
+
+        """
+        hosts = hosts or {}
+        types = types or {}
+
+        for ct in [cls.CT_JOBS, cls.CT_OBJECTSTORE]:
+            dataset = Datasource.get_latest_dataset(project, ct)
+            source = Datasource(
+                project=project,
+                contenttype=ct,
+                dataset=dataset or 1,
+            )
+            source.save()
+
+        return cls(project=project)
+
+    def get_jobs_dhub(self):
+        """Get the dhub for jobs"""
+        return self.get_dhub(self.CT_JOBS)
+
+    def get_os_dhub(self):
+        """Get the dhub for the objectstore"""
+        return self.get_dhub(self.CT_OBJECTSTORE)

    def get_oauth_consumer_secret(self, key):
-        ds = self.sources[self.CT_OBJECTSTORE].datasource
+        ds = self.get_datasource(self.CT_OBJECTSTORE)
        secret = ds.get_oauth_consumer_secret(key)
        return secret

-    def _get_last_insert_id(self, source=None):
+    def _get_last_insert_id(self, contenttype=None):
        """Return last-inserted ID."""
-        if not source:
-            source = self.CT_JOBS
-        return self.sources[source].dhub.execute(
+        if not contenttype:
+            contenttype = self.CT_JOBS
+        return self.get_dhub(contenttype).execute(
            proc='generic.selects.get_last_insert_id',
            debug_show=self.DEBUG,
            return_type='iter',
@ -79,7 +88,7 @@ class JobsModel(TreeherderModelBase):
        error = "N" if error is None else "Y"
        error_msg = error or ""

-        self.sources[self.CT_OBJECTSTORE].dhub.execute(
+        self.get_os_dhub().execute(
            proc='objectstore.inserts.store_json',
            placeholders=[loaded_timestamp, json_data, error, error_msg],
            debug_show=self.DEBUG
@ -98,7 +107,7 @@ class JobsModel(TreeherderModelBase):

        """
        proc = "objectstore.selects.get_unprocessed"
-        json_blobs = self.sources[self.CT_OBJECTSTORE].dhub.execute(
+        json_blobs = self.get_os_dhub().execute(
            proc=proc,
            placeholders=[limit],
            debug_show=self.DEBUG,
@ -274,7 +283,7 @@ class JobsModel(TreeherderModelBase):
        return job_id

    def _insert_data(self, statement, placeholders, executemany=False):
-        self.sources[self.CT_JOBS].dhub.execute(
+        self.get_jobs_dhub().execute(
            proc='jobs.inserts.' + statement,
            debug_show=self.DEBUG,
            placeholders=placeholders,
@ -288,7 +297,7 @@ class JobsModel(TreeherderModelBase):

    def _get_last_insert_id(self, source=CT_JOBS):
        """Return last-inserted ID."""
-        return self.sources[source].dhub.execute(
+        return self.get_dhub(source).execute(
            proc='generic.selects.get_last_insert_id',
            debug_show=self.DEBUG,
            return_type='iter',
@ -360,17 +369,17 @@ class JobsModel(TreeherderModelBase):
        # Note: this claims rows for processing. Failure to call load_job_data
        # on this data will result in some json blobs being stuck in limbo
        # until another worker comes along with the same connection ID.
-        self.sources[self.CT_OBJECTSTORE].dhub.execute(
+        self.get_os_dhub().execute(
            proc=proc_mark,
            placeholders=[limit],
            debug_show=self.DEBUG,
-            )
+        )

        resetwarnings()

        # Return all JSON blobs claimed by this connection ID (could possibly
        # include orphaned rows from a previous run).
-        json_blobs = self.sources[self.CT_OBJECTSTORE].dhub.execute(
+        json_blobs = self.get_os_dhub().execute(
            proc=proc_get,
            debug_show=self.DEBUG,
            return_type='tuple'
@ -380,7 +389,7 @@ class JobsModel(TreeherderModelBase):

    def mark_object_complete(self, object_id, job_id):
        """ Call to database to mark the task completed """
-        self.sources[self.CT_OBJECTSTORE].dhub.execute(
+        self.get_os_dhub().execute(
            proc="objectstore.updates.mark_complete",
            placeholders=[job_id, object_id],
            debug_show=self.DEBUG
@ -388,7 +397,7 @@ class JobsModel(TreeherderModelBase):

    def mark_object_error(self, object_id, error):
        """ Call to database to mark the task completed """
-        self.sources[self.CT_OBJECTSTORE].dhub.execute(
+        self.get_os_dhub().execute(
            proc="objectstore.updates.mark_error",
            placeholders=[error, object_id],
            debug_show=self.DEBUG
--- a/treeherder/model/derived/refdata.py
+++ b/treeherder/model/derived/refdata.py
@ -2,7 +2,6 @@ import os
 from django.conf import settings
 from datasource.bases.BaseHub import BaseHub
 from datasource.DataHub import DataHub
-from .base import TreeherderModelBase


 class RefDataManager(object):
--- a/treeherder/model/models.py
+++ b/treeherder/model/models.py
@ -9,6 +9,7 @@ from datasource.hubs.MySQL import MySQL
 from django.conf import settings
 from django.core.cache import cache
 from django.db import models
+from django.db.models import Max

 from treeherder import path

@ -157,13 +158,26 @@ class MachineNote(models.Model):

 class DatasourceManager(models.Manager):
    def cached(self):
-        """Return all datasources, caching the results."""
+        """
+        Return all datasources, caching the results.
+
+        """
        sources = cache.get(SOURCES_CACHE_KEY)
        if not sources:
            sources = list(self.all())
            cache.set(SOURCES_CACHE_KEY, sources)
        return sources

+    def latest(self, project, contenttype):
+        """
+        @@@ TODO: this needs to use the cache, probably
+        """
+        ds = Datasource.get_latest_dataset(project, contenttype)
+        return self.get(
+            project=project,
+            contenttype=contenttype,
+            dataset=ds)
+

 class Datasource(models.Model):
    id = models.IntegerField(primary_key=True)
@ -192,6 +206,14 @@ class Datasource(models.Model):
        cache.delete(SOURCES_CACHE_KEY)
        cls.objects.cached()

+    @classmethod
+    def get_latest_dataset(cls, project, contenttype):
+        """get the latest dataset"""
+        return cls.objects.filter(
+            project=project,
+            contenttype=contenttype,
+        ).aggregate(Max("dataset"))["dataset__max"]
+
    @property
    def key(self):
        """Unique key for a data source is the project, contenttype, dataset."""
@ -202,6 +224,31 @@ class Datasource(models.Model):
        """Unicode representation is the project's unique key."""
        return unicode(self.key)

+    def create_next_dataset(self, schema_file=None):
+        """
+        Create and return the next dataset for this project/contenttype.
+
+        The database for the new dataset will be located on the same host.
+
+        """
+        dataset = Datasource.objects.filter(
+            project=self.project,
+            contenttype=self.contenttype
+        ).order_by("-dataset")[0].dataset + 1
+
+        # @@@ should we store the schema file name used for the previous
+        # dataset in the db and use the same one again automatically? or should
+        # we actually copy the schema of an existing dataset rather than using
+        # a schema file at all?
+        return Datasource.objects.create(
+            project=self.project,
+            contenttype=self.contenttype,
+            dataset=dataset,
+            host=self.datasource.host,
+            db_type=self.datasource.type,
+            schema_file=schema_file,
+        )
+
    def save(self, *args, **kwargs):
        inserting = not self.pk
        # in case you want to add a new datasource and provide
--- a/treeherder/model/sql/objectstore.json
+++ b/treeherder/model/sql/objectstore.json
@ -2,7 +2,7 @@
    "inserts":{
        "store_json":{

-            "sql":"INSERT INTO  `objectstore` (`date_timestamp`,
+            "sql":"INSERT INTO  `objectstore` (`loaded_timestamp`,
                                               `json_blob`,
                                               `error`,
                                               `error_msg`)