Add db migration(will help with model migration)
Update image to support db migration Add migration to add model_keys to help eliminate duplicates through indexing
@ -35,6 +35,8 @@ VOLUME /model_data
# Finish the setup
ADD docker_scripts/nginx.conf /etc/nginx/nginx.conf
ADD docker_scripts/ docker_scripts/wsgi.ini ./
# Add migrations
ADD migrations ./migrations
# Add the rest of our library here
ADD seattle_flu_incidence_mapper ./seattle_flu_incidence_mapper
@ -1,5 +1,5 @@
module = seattle_flu_incidence_mapper.server:app
module =
master = true
# Set the processes to total CPU count
processes = %k
@ -0,0 +1 @@
@ -0,0 +1,45 @@
# template used to generate migration files
# file_template = %%(rev)s_%%(slug)s
# set to 'true' to run the environment during
# the 'revision' command, regardless of autogenerate
# revision_environment = false
# Logging configuration
keys = root,sqlalchemy,alembic
keys = console
keys = generic
level = WARN
handlers = console
qualname =
level = WARN
handlers =
qualname = sqlalchemy.engine
level = INFO
handlers =
qualname = alembic
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S
@ -0,0 +1,95 @@
from __future__ import with_statement
import logging
from logging.config import fileConfig
from sqlalchemy import engine_from_config
from sqlalchemy import pool
from alembic import context
# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config
# Interpret the config file for Python logging.
# This line sets up loggers basically.
logger = logging.getLogger('alembic.env')
# add your model's MetaData object here
# for 'autogenerate' support
# from myapp import mymodel
# target_metadata = mymodel.Base.metadata
from flask import current_app
target_metadata = current_app.extensions['migrate'].db.metadata
# other values from the config, defined by the needs of,
# can be acquired:
# my_important_option = config.get_main_option("my_important_option")
# ... etc.
def run_migrations_offline():
"""Run migrations in 'offline' mode.
This configures the context with just a URL
and not an Engine, though an Engine is acceptable
here as well. By skipping the Engine creation
we don't even need a DBAPI to be available.
Calls to context.execute() here emit the given string to the
script output.
url = config.get_main_option("sqlalchemy.url")
url=url, target_metadata=target_metadata, literal_binds=True
with context.begin_transaction():
def run_migrations_online():
"""Run migrations in 'online' mode.
In this scenario we need to create an Engine
and associate a connection with the context.
# this callback is used to prevent an auto-migration from being generated
# when there are no changes to the schema
# reference:
def process_revision_directives(context, revision, directives):
if getattr(config.cmd_opts, 'autogenerate', False):
script = directives[0]
if script.upgrade_ops.is_empty():
directives[:] = []
||||'No changes in schema detected.')
connectable = engine_from_config(
with connectable.connect() as connection:
with context.begin_transaction():
if context.is_offline_mode():
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision = ${repr(up_revision)}
down_revision = ${repr(down_revision)}
branch_labels = ${repr(branch_labels)}
depends_on = ${repr(depends_on)}
def upgrade():
${upgrades if upgrades else "pass"}
def downgrade():
${downgrades if downgrades else "pass"}
"""empty message
Revision ID: 2c92fa01c7ef
Revises: 7d4c7414c89e
Create Date: 2019-05-16 12:39:40.321616
import hashlib
from datetime import datetime
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
from sqlalchemy import Column, String, DateTime
from sqlalchemy.orm import sessionmaker
from seattle_flu_incidence_mapper.orm_config import get_declarative_base
connection = op.get_bind()
Session = sessionmaker()
revision = '2c92fa01c7ef'
down_revision = '7d4c7414c89e'
branch_labels = None
depends_on = None
old_model_name = 'pathogen_model'
new_model_name = 'generic_model'
class PathogenModel(get_declarative_base()):
__tablename__ = 'pathogen_model'
id = Column(String, primary_key=True)
name = Column(String)
query_str = Column(String)
model_type = Column(String)
rds_key = Column(String)
model_key = Column(String)
created = Column(DateTime, primary_key=True, default=datetime.utcnow)
def upgrade():
# sqlite doesn't support drop constraint so
# create our new table
sa.Column('id', sa.VARCHAR(), nullable=False, primary_key=True),
sa.Column('name', sa.VARCHAR(), nullable=True),
sa.Column('query_str', sa.VARCHAR(), nullable=True),
sa.Column('model_type', sa.VARCHAR(), nullable=True),
sa.Column('rds_key', sa.VARCHAR(), nullable=True),
sa.Column('model_key', sa.VARCHAR(), nullable=False, primary_key=True),
sa.Column('created', sa.DATETIME(), nullable=False),
sa.PrimaryKeyConstraint('id', 'model_key')
# add our column to our old table and populate the column
#with op.batch_alter_table(old_model_name) as batch_op:
# batch_op.add_column(Column('model_key', String()))
old_columns = ['id', 'name', 'query_str', 'model_type', 'rds_key', 'model_key', 'created']
col_str = ",".join(old_columns)
op.execute(f'INSERT INTO {new_model_name} ({col_str}) SELECT {col_str} FROM {old_model_name};')
def downgrade():
op.rename_table(new_model_name, old_model_name)
with op.batch_alter_table(old_model_name) as batch_op:
def populate_model_keys():
session = Session(bind=connection)
from seattle_flu_incidence_mapper.model_store import get_model_file
model_id_key_hash = {}
# let's calculate our model keys and rds keys
for model in session.query(PathogenModel).order_by(PathogenModel.created.desc()).all():
modelfile = get_model_file(
with open(modelfile, 'r') as mf:
model_key = hashlib.md5('utf-8')).hexdigest()
model.model_key = model_key
combo_id = + model_key
if combo_id not in model_id_key_hash:
model_id_key_hash[ + model_key] = True
# now find models that have duplicate ids/model keys
# only keep the latest
"""empty message
Revision ID: 7d4c7414c89e
Create Date: 2019-05-16 12:35:09.965531
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
from sqlalchemy.engine import reflection
revision = '7d4c7414c89e'
down_revision = None
branch_labels = None
depends_on = None
def upgrade():
engine = op.get_bind().engine
table_names = sa.inspect(engine).get_table_names()
if 'pathogen_model' not in table_names:
sa.Column('id', sa.VARCHAR(), nullable=False, primary_key=True),
sa.Column('name', sa.VARCHAR(), nullable=True),
sa.Column('query_str', sa.VARCHAR(), nullable=True),
sa.Column('model_type', sa.VARCHAR(), nullable=True),
sa.Column('rds_key', sa.VARCHAR(), nullable=True),
sa.Column('created', sa.DATETIME(), nullable=False, primary_key=True),
sa.PrimaryKeyConstraint('id', 'created')
def downgrade():
# This is our first so nothing to do after this
# Get the application instance
from seattle_flu_incidence_mapper.config import app
if __name__ == "__main__":
@ -2,6 +2,7 @@ import json
import os
import connexion
from flask import Response
from flask_migrate import Migrate
from sqlalchemy.orm.exc import NoResultFound
from seattle_flu_incidence_mapper.orm_config import setup_db
@ -9,26 +10,6 @@ from seattle_flu_incidence_mapper.utils import set_marshmallow, ModelExecutionEx
basedir = os.path.abspath(os.path.dirname(__file__))
true_vals = ['1', 'y', 'yes', 't', 'true']
# Create the Connexion application instance
connex_app = connexion.App("seattle_flu_incidence_mapper.config", specification_dir=os.path.join(basedir, 'swagger'))
# Get the underlying Flask app instance
app =
app.config['WORKER_IMAGE'] = os.environ.get('WORKER_IMAGE', '')
app.config['MODEL_STORE'] = os.environ.get('MODEL_STORE', os.path.abspath(os.path.join(os.getcwd(), "../../test_model_store")))
app.config['MODEL_HOST_PATH'] = os.environ.get('MODEL_HOST_PATH', os.path.abspath(os.path.join(os.getcwd(), "../../test_model_store")))
app.config['WORKER_JOB_HOST_PATH'] = os.environ.get('WORKER_JOB_HOST_PATH', os.path.abspath(os.path.join(os.getcwd(), "../../test_jobs")))
app.config['MODEL_JOB_PATH'] = os.environ.get('MODEL_JOB_PATH', os.path.abspath(os.path.join(os.getcwd(), "../../test_jobs")))
db = setup_db(basedir, app)
# DO NOT MOVE this line. The order matters here
# we need to init our db before loading our models
from seattle_flu_incidence_mapper.models import *
if os.environ.get('DEBUG', '0').lower() in true_vals or os.environ.get('CREATE_DB', '0').lower() in true_vals:
def sqlalchemy_error_handler(exception):
@ -41,5 +22,32 @@ def model_exec_error_handler(exception):
status=500, mimetype="application/json")
# Create the Connexion application instance
connex_app = connexion.App("seattle_flu_incidence_mapper.config", specification_dir=os.path.join(basedir, 'swagger'))
# Get the underlying Flask app instance
app =
app.config['WORKER_IMAGE'] = os.environ.get('WORKER_IMAGE', '')
app.config['MODEL_STORE'] = os.environ.get('MODEL_STORE', os.path.abspath(os.path.join(os.getcwd(), "../../test_model_store")))
app.config['MODEL_HOST_PATH'] = os.environ.get('MODEL_HOST_PATH', os.path.abspath(os.path.join(os.getcwd(), "../../test_model_store")))
app.config['WORKER_JOB_HOST_PATH'] = os.environ.get('WORKER_JOB_HOST_PATH', os.path.abspath(os.path.join(os.getcwd(), "../../test_jobs")))
app.config['MODEL_JOB_PATH'] = os.environ.get('MODEL_JOB_PATH', os.path.abspath(os.path.join(os.getcwd(), "../../test_jobs")))
db = setup_db(basedir, app)
migrate = Migrate(app, db)
# DO NOT MOVE this line. The order matters here
# we need to init our db before loading our models
from seattle_flu_incidence_mapper.models import *
if os.environ.get('DEBUG', '0').lower() in true_vals or os.environ.get('CREATE_DB', '0').lower() in true_vals:
connex_app.add_error_handler(ModelExecutionException, model_exec_error_handler)
connex_app.add_error_handler(NoResultFound, sqlalchemy_error_handler)
connex_app.add_error_handler(NoResultFound, sqlalchemy_error_handler)
# Read the swagger.yml file to configure the endpoints
from datetime import datetime
from seattle_flu_incidence_mapper.orm_config import db
from sqlalchemy import String, Column, DateTime
from seattle_flu_incidence_mapper.orm_config import get_session, get_declarative_base
from seattle_flu_incidence_mapper.utils import ma
base = get_declarative_base()
class GenericModel(db.Model):
__tablename__ = 'pathogen_model'
id = db.Column(db.String, primary_key=True)
name = db.Column(db.String)
query_str = db.Column(db.String)
model_type = db.Column(db.String)
rds_key = db.Column(db.String)
created = db.Column(db.DateTime, primary_key=True, default=datetime.utcnow)
class GenericModel(base):
__tablename__ = 'generic_model'
id = Column(String, primary_key=True)
name = Column(String)
query_str = Column(String)
model_type = Column(String)
model_key = Column(String, primary_key=True)
rds_key = Column(String)
created = Column(DateTime, default=datetime.utcnow)
class GenericModelSchema(ma.ModelSchema):
class Meta:
model = GenericModel
sqla_session = db.session
sqla_session = get_session
import functools
import os
from flask_sqlalchemy import SQLAlchemy
from sqlalchemy.ext.declarative import declarative_base
db = None
session = None
base = None
def get_declarative_base():
global base
if base is None:
base = declarative_base()
return base
def get_db():
return db
def get_session():
return session
def setup_db(basedir, app):
global db
global db, session
# Configure the SQLAlchemy part of the app instance
app.config['SQLALCHEMY_ECHO'] = True
app.config['SQLALCHEMY_DATABASE_URI'] = os.getenv('SQLALCHEMY_DATABASE_URI','sqlite:////' + os.path.join(basedir, 'models.db'))
app.config['SQLALCHEMY_DATABASE_URI'] = os.getenv('SQLALCHEMY_DATABASE_URI', 'sqlite:////' + os.path.join(basedir, 'models.db'))
# Create the SQLAlchemy db instance
db = SQLAlchemy(app)
return db
session = db.session
return db
# local modules
from connexion.exceptions import OAuthProblem
from flask import request
import seattle_flu_incidence_mapper.config as config
# Get the application instance
app = config.connex_app
# Read the swagger.yml file to configure the endpoints
if __name__ == "__main__":
@ -5,7 +5,7 @@ from flask.testing import FlaskClient
from werkzeug.utils import cached_property
from werkzeug.wrappers import BaseResponse
from seattle_flu_incidence_mapper.server import app
from import app
BASE_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), 'fixtures'))
