Bug 1116511: Add script to sync data from Tableau.

This commit is contained in:
Paul McLanahan 2015-02-06 16:34:50 -05:00
Родитель e6d44decf2
Коммит b0eced3174
4 изменённых файлов: 135 добавлений и 0 удалений

Просмотреть файл

@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
from south.utils import datetime_utils as datetime
from south.db import db
from south.v2 import SchemaMigration
from django.db import models
class Migration(SchemaMigration):
def forwards(self, orm):
# Adding model 'ContributorActivity'
db.create_table(u'mozorg_contributoractivity', (
(u'id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
('date', self.gf('django.db.models.fields.DateField')()),
('source_name', self.gf('django.db.models.fields.CharField')(max_length=100)),
('team_name', self.gf('django.db.models.fields.CharField')(max_length=100)),
('total', self.gf('django.db.models.fields.IntegerField')()),
('new', self.gf('django.db.models.fields.IntegerField')()),
))
db.send_create_signal(u'mozorg', ['ContributorActivity'])
# Adding unique constraint on 'ContributorActivity', fields ['date', 'source_name', 'team_name']
db.create_unique(u'mozorg_contributoractivity', ['date', 'source_name', 'team_name'])
def backwards(self, orm):
# Removing unique constraint on 'ContributorActivity', fields ['date', 'source_name', 'team_name']
db.delete_unique(u'mozorg_contributoractivity', ['date', 'source_name', 'team_name'])
# Deleting model 'ContributorActivity'
db.delete_table(u'mozorg_contributoractivity')
models = {
u'mozorg.contributoractivity': {
'Meta': {'ordering': "['-date']", 'unique_together': "(('date', 'source_name', 'team_name'),)", 'object_name': 'ContributorActivity'},
'date': ('django.db.models.fields.DateField', [], {}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'new': ('django.db.models.fields.IntegerField', [], {}),
'source_name': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
'team_name': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
'total': ('django.db.models.fields.IntegerField', [], {})
},
u'mozorg.twittercache': {
'Meta': {'object_name': 'TwitterCache'},
'account': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '100', 'db_index': 'True'}),
u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
'tweets': ('picklefield.fields.PickledObjectField', [], {'default': '[]'}),
'updated': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now', 'blank': 'True'})
}
}
complete_apps = ['mozorg']

Просмотреть файл

@ -31,3 +31,16 @@ class TwitterCache(models.Model):
def __unicode__(self): def __unicode__(self):
return u'Tweets from @' + self.account return u'Tweets from @' + self.account
class ContributorActivity(models.Model):
date = models.DateField()
source_name = models.CharField(max_length=100)
team_name = models.CharField(max_length=100)
total = models.IntegerField()
new = models.IntegerField()
class Meta:
unique_together = ('date', 'source_name', 'team_name')
get_latest_by = 'date'
ordering = ['-date']

Просмотреть файл

@ -2222,3 +2222,5 @@ FIREFOX_OS_FEEDS = (
('pt-BR', 'https://blog.mozilla.org/press-br/category/firefox-os/feed/'), ('pt-BR', 'https://blog.mozilla.org/press-br/category/firefox-os/feed/'),
) )
FIREFOX_OS_FEED_LOCALES = [feed[0] for feed in FIREFOX_OS_FEEDS] FIREFOX_OS_FEED_LOCALES = [feed[0] for feed in FIREFOX_OS_FEEDS]
TABLEAU_DB_URL = None

Просмотреть файл

@ -0,0 +1,67 @@
import urlparse
import sys
from django.conf import settings
import MySQLdb
from bedrock.mozorg.models import ContributorActivity
urlparse.uses_netloc.append('mysql')
QUERY = ('SELECT c_date, team_name, source_name, count(*) AS total, IFNULL(SUM(is_new), 0) AS new '
'FROM contributor_active {where} GROUP BY c_date, team_name, source_name')
def run():
"""Get contributor activity data from Tableau and insert it into bedrock DB."""
if not settings.TABLEAU_DB_URL:
print 'Must set TABLEAU_DB_URL.'
sys.exit(1)
url = urlparse.urlparse(settings.TABLEAU_DB_URL)
if not url.path:
# bad db url
print 'TABLEAU_DB_URL not parseable.'
sys.exit(1)
con_data = {
# remove slash
'db': url.path[1:],
'user': url.username,
'passwd': url.password,
'host': url.hostname,
}
con = None
try:
latest_date = ContributorActivity.objects.only('date').latest().date
where_clause = 'WHERE c_date > "{0}"'.format(latest_date.isoformat())
except ContributorActivity.DoesNotExist:
where_clause = ''
try:
con = MySQLdb.connect(**con_data)
cur = con.cursor()
cur.execute(QUERY.format(where=where_clause))
rows = cur.fetchall()
activities = []
for row in rows:
activities.append(ContributorActivity(
date=row[0],
team_name=row[1],
source_name=row[2],
total=row[3],
new=row[4],
))
ContributorActivity.objects.bulk_create(activities)
print 'Created {0} contributor activity rows'.format(len(rows))
except MySQLdb.Error as e:
sys.stderr.write('Error %d: %s' % (e.args[0], e.args[1]))
sys.exit(1)
finally:
if con:
con.close()