Add db migration(will help with model migration)

Update image to support db migration
Add migration to add model_keys to help eliminate duplicates through indexing
This commit is contained in:
Clinton Collins 2019-05-16 15:21:06 -07:00
Родитель d4529a42a5
Коммит ad24a414b7
15 изменённых файлов: 381 добавлений и 51 удалений

Просмотреть файл

@ -35,6 +35,8 @@ VOLUME /model_data
# Finish the setup
ADD docker_scripts/nginx.conf /etc/nginx/nginx.conf
ADD docker_scripts/entrypoint.sh docker_scripts/wsgi.ini ./
# Add migrations
ADD migrations ./migrations
# Add the rest of our library here
ADD seattle_flu_incidence_mapper ./seattle_flu_incidence_mapper

Просмотреть файл

@ -1,5 +1,5 @@
[uwsgi]
module = seattle_flu_incidence_mapper.server:app
module = seattle_flu_incidence_mapper.app:app
master = true
# Set the processes to total CPU count
processes = %k

Просмотреть файл

@ -0,0 +1 @@
Generic single-database configuration.

Просмотреть файл

@ -0,0 +1,45 @@
# A generic, single database configuration.
[alembic]
# template used to generate migration files
# file_template = %%(rev)s_%%(slug)s
# set to 'true' to run the environment during
# the 'revision' command, regardless of autogenerate
# revision_environment = false
# Logging configuration
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARN
handlers = console
qualname =
[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S

Просмотреть файл

@ -0,0 +1,95 @@
from __future__ import with_statement
import logging
from logging.config import fileConfig
from sqlalchemy import engine_from_config
from sqlalchemy import pool
from alembic import context
# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config
# Interpret the config file for Python logging.
# This line sets up loggers basically.
fileConfig(config.config_file_name)
logger = logging.getLogger('alembic.env')
# add your model's MetaData object here
# for 'autogenerate' support
# from myapp import mymodel
# target_metadata = mymodel.Base.metadata
from flask import current_app
config.set_main_option('sqlalchemy.url',
current_app.config.get('SQLALCHEMY_DATABASE_URI'))
target_metadata = current_app.extensions['migrate'].db.metadata
# other values from the config, defined by the needs of env.py,
# can be acquired:
# my_important_option = config.get_main_option("my_important_option")
# ... etc.
def run_migrations_offline():
"""Run migrations in 'offline' mode.
This configures the context with just a URL
and not an Engine, though an Engine is acceptable
here as well. By skipping the Engine creation
we don't even need a DBAPI to be available.
Calls to context.execute() here emit the given string to the
script output.
"""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url, target_metadata=target_metadata, literal_binds=True
)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online():
"""Run migrations in 'online' mode.
In this scenario we need to create an Engine
and associate a connection with the context.
"""
# this callback is used to prevent an auto-migration from being generated
# when there are no changes to the schema
# reference: http://alembic.zzzcomputing.com/en/latest/cookbook.html
def process_revision_directives(context, revision, directives):
if getattr(config.cmd_opts, 'autogenerate', False):
script = directives[0]
if script.upgrade_ops.is_empty():
directives[:] = []
logger.info('No changes in schema detected.')
connectable = engine_from_config(
config.get_section(config.config_ini_section),
prefix='sqlalchemy.',
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(
connection=connection,
target_metadata=target_metadata,
process_revision_directives=process_revision_directives,
**current_app.extensions['migrate'].configure_args
)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()

Просмотреть файл

@ -0,0 +1,24 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision = ${repr(up_revision)}
down_revision = ${repr(down_revision)}
branch_labels = ${repr(branch_labels)}
depends_on = ${repr(depends_on)}
def upgrade():
${upgrades if upgrades else "pass"}
def downgrade():
${downgrades if downgrades else "pass"}

Просмотреть файл

@ -0,0 +1,101 @@
"""empty message
Revision ID: 2c92fa01c7ef
Revises: 7d4c7414c89e
Create Date: 2019-05-16 12:39:40.321616
"""
import hashlib
from datetime import datetime
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
from sqlalchemy import Column, String, DateTime
from sqlalchemy.orm import sessionmaker
from seattle_flu_incidence_mapper.orm_config import get_declarative_base
connection = op.get_bind()
Session = sessionmaker()
revision = '2c92fa01c7ef'
down_revision = '7d4c7414c89e'
branch_labels = None
depends_on = None
old_model_name = 'pathogen_model'
new_model_name = 'generic_model'
class PathogenModel(get_declarative_base()):
__tablename__ = 'pathogen_model'
id = Column(String, primary_key=True)
name = Column(String)
query_str = Column(String)
model_type = Column(String)
rds_key = Column(String)
model_key = Column(String)
created = Column(DateTime, primary_key=True, default=datetime.utcnow)
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
# sqlite doesn't support drop constraint so
# create our new table
op.create_table(new_model_name,
sa.Column('id', sa.VARCHAR(), nullable=False, primary_key=True),
sa.Column('name', sa.VARCHAR(), nullable=True),
sa.Column('query_str', sa.VARCHAR(), nullable=True),
sa.Column('model_type', sa.VARCHAR(), nullable=True),
sa.Column('rds_key', sa.VARCHAR(), nullable=True),
sa.Column('model_key', sa.VARCHAR(), nullable=False, primary_key=True),
sa.Column('created', sa.DATETIME(), nullable=False),
sa.PrimaryKeyConstraint('id', 'model_key')
)
# add our column to our old table and populate the column
#with op.batch_alter_table(old_model_name) as batch_op:
# batch_op.add_column(Column('model_key', String()))
populate_model_keys()
old_columns = ['id', 'name', 'query_str', 'model_type', 'rds_key', 'model_key', 'created']
col_str = ",".join(old_columns)
op.execute(f'INSERT INTO {new_model_name} ({col_str}) SELECT {col_str} FROM {old_model_name};')
op.drop_table(old_model_name)
### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.rename_table(new_model_name, old_model_name)
with op.batch_alter_table(old_model_name) as batch_op:
batch_op.drop_column('rds_key')
batch_op.drop_column('model_key')
# ### end Alembic commands ###
def populate_model_keys():
session = Session(bind=connection)
from seattle_flu_incidence_mapper.model_store import get_model_file
model_id_key_hash = {}
# let's calculate our model keys and rds keys
for model in session.query(PathogenModel).order_by(PathogenModel.created.desc()).all():
modelfile = get_model_file(model.id)
with open(modelfile, 'r') as mf:
model_key = hashlib.md5(mf.read().encode('utf-8')).hexdigest()
model.model_key = model_key
combo_id = model.id + model_key
if combo_id not in model_id_key_hash:
model_id_key_hash[model.id + model_key] = True
session.add(model)
else:
session.delete(model)
# now find models that have duplicate ids/model keys
# only keep the latest
session.flush()
session.commit()

Просмотреть файл

@ -0,0 +1,40 @@
"""empty message
Revision ID: 7d4c7414c89e
Revises:
Create Date: 2019-05-16 12:35:09.965531
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
from sqlalchemy.engine import reflection
revision = '7d4c7414c89e'
down_revision = None
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
engine = op.get_bind().engine
table_names = sa.inspect(engine).get_table_names()
if 'pathogen_model' not in table_names:
op.create_table('pathogen_model',
sa.Column('id', sa.VARCHAR(), nullable=False, primary_key=True),
sa.Column('name', sa.VARCHAR(), nullable=True),
sa.Column('query_str', sa.VARCHAR(), nullable=True),
sa.Column('model_type', sa.VARCHAR(), nullable=True),
sa.Column('rds_key', sa.VARCHAR(), nullable=True),
sa.Column('created', sa.DATETIME(), nullable=False, primary_key=True),
sa.PrimaryKeyConstraint('id', 'created')
)
# ### end Alembic commands ###
def downgrade():
# This is our first so nothing to do after this
pass

Просмотреть файл

@ -0,0 +1,7 @@
# Get the application instance
from seattle_flu_incidence_mapper.config import app
if __name__ == "__main__":
app.cli()

Просмотреть файл

@ -2,6 +2,7 @@ import json
import os
import connexion
from flask import Response
from flask_migrate import Migrate
from sqlalchemy.orm.exc import NoResultFound
from seattle_flu_incidence_mapper.orm_config import setup_db
@ -9,26 +10,6 @@ from seattle_flu_incidence_mapper.utils import set_marshmallow, ModelExecutionEx
basedir = os.path.abspath(os.path.dirname(__file__))
true_vals = ['1', 'y', 'yes', 't', 'true']
# Create the Connexion application instance
connex_app = connexion.App("seattle_flu_incidence_mapper.config", specification_dir=os.path.join(basedir, 'swagger'))
# Get the underlying Flask app instance
app = connex_app.app
app.config['WORKER_IMAGE'] = os.environ.get('WORKER_IMAGE', 'idm-docker-production.packages.idmod.org/sfim-worker:latest')
app.config['MODEL_STORE'] = os.environ.get('MODEL_STORE', os.path.abspath(os.path.join(os.getcwd(), "../../test_model_store")))
app.config['MODEL_HOST_PATH'] = os.environ.get('MODEL_HOST_PATH', os.path.abspath(os.path.join(os.getcwd(), "../../test_model_store")))
app.config['WORKER_JOB_HOST_PATH'] = os.environ.get('WORKER_JOB_HOST_PATH', os.path.abspath(os.path.join(os.getcwd(), "../../test_jobs")))
app.config['MODEL_JOB_PATH'] = os.environ.get('MODEL_JOB_PATH', os.path.abspath(os.path.join(os.getcwd(), "../../test_jobs")))
db = setup_db(basedir, app)
set_marshmallow(app)
# DO NOT MOVE this line. The order matters here
# we need to init our db before loading our models
from seattle_flu_incidence_mapper.models import *
if os.environ.get('DEBUG', '0').lower() in true_vals or os.environ.get('CREATE_DB', '0').lower() in true_vals:
db.create_all()
def sqlalchemy_error_handler(exception):
@ -41,5 +22,32 @@ def model_exec_error_handler(exception):
status=500, mimetype="application/json")
# Create the Connexion application instance
connex_app = connexion.App("seattle_flu_incidence_mapper.config", specification_dir=os.path.join(basedir, 'swagger'))
# Get the underlying Flask app instance
app = connex_app.app
app.config['WORKER_IMAGE'] = os.environ.get('WORKER_IMAGE', 'idm-docker-production.packages.idmod.org/sfim-worker:latest')
app.config['MODEL_STORE'] = os.environ.get('MODEL_STORE', os.path.abspath(os.path.join(os.getcwd(), "../../test_model_store")))
app.config['MODEL_HOST_PATH'] = os.environ.get('MODEL_HOST_PATH', os.path.abspath(os.path.join(os.getcwd(), "../../test_model_store")))
app.config['WORKER_JOB_HOST_PATH'] = os.environ.get('WORKER_JOB_HOST_PATH', os.path.abspath(os.path.join(os.getcwd(), "../../test_jobs")))
app.config['MODEL_JOB_PATH'] = os.environ.get('MODEL_JOB_PATH', os.path.abspath(os.path.join(os.getcwd(), "../../test_jobs")))
db = setup_db(basedir, app)
migrate = Migrate(app, db)
set_marshmallow(app)
# DO NOT MOVE this line. The order matters here
# we need to init our db before loading our models
from seattle_flu_incidence_mapper.models import *
if os.environ.get('DEBUG', '0').lower() in true_vals or os.environ.get('CREATE_DB', '0').lower() in true_vals:
db.create_all()
connex_app.add_error_handler(ModelExecutionException, model_exec_error_handler)
connex_app.add_error_handler(NoResultFound, sqlalchemy_error_handler)
connex_app.add_error_handler(NoResultFound, sqlalchemy_error_handler)
# Read the swagger.yml file to configure the endpoints
connex_app.add_api("swagger.yml")

Просмотреть файл

@ -1,20 +1,23 @@
from datetime import datetime
from seattle_flu_incidence_mapper.orm_config import db
from sqlalchemy import String, Column, DateTime
from seattle_flu_incidence_mapper.orm_config import get_session, get_declarative_base
from seattle_flu_incidence_mapper.utils import ma
base = get_declarative_base()
class GenericModel(db.Model):
__tablename__ = 'pathogen_model'
id = db.Column(db.String, primary_key=True)
name = db.Column(db.String)
query_str = db.Column(db.String)
model_type = db.Column(db.String)
rds_key = db.Column(db.String)
created = db.Column(db.DateTime, primary_key=True, default=datetime.utcnow)
class GenericModel(base):
__tablename__ = 'generic_model'
id = Column(String, primary_key=True)
name = Column(String)
query_str = Column(String)
model_type = Column(String)
model_key = Column(String, primary_key=True)
rds_key = Column(String)
created = Column(DateTime, default=datetime.utcnow)
class GenericModelSchema(ma.ModelSchema):
class Meta:
model = GenericModel
sqla_session = db.session
sqla_session = get_session

Просмотреть файл

@ -1,22 +1,39 @@
import functools
import os
from flask_sqlalchemy import SQLAlchemy
from sqlalchemy.ext.declarative import declarative_base
db = None
session = None
base = None
def get_declarative_base():
global base
if base is None:
base = declarative_base()
return base
def get_db():
return db
def get_session():
return session
def setup_db(basedir, app):
global db
global db, session
# Configure the SQLAlchemy part of the app instance
app.config['SQLALCHEMY_ECHO'] = True
app.config['SQLALCHEMY_DATABASE_URI'] = os.getenv('SQLALCHEMY_DATABASE_URI','sqlite:////' + os.path.join(basedir, 'models.db'))
app.config['SQLALCHEMY_DATABASE_URI'] = os.getenv('SQLALCHEMY_DATABASE_URI', 'sqlite:////' + os.path.join(basedir, 'models.db'))
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
# Create the SQLAlchemy db instance
db = SQLAlchemy(app)
return db
session = db.session
return db

Просмотреть файл

@ -1,13 +0,0 @@
# local modules
from connexion.exceptions import OAuthProblem
from flask import request
import seattle_flu_incidence_mapper.config as config
# Get the application instance
app = config.connex_app
# Read the swagger.yml file to configure the endpoints
app.add_api("swagger.yml")
if __name__ == "__main__":
app.run(debug=True)

Просмотреть файл

@ -51,7 +51,7 @@ setup(
'marshmallow-sqlalchemy',
"docker",
'psycopg2-binary',
''
'Flask-Migrate'
],
extras_require={

Просмотреть файл

@ -5,7 +5,7 @@ from flask.testing import FlaskClient
from werkzeug.utils import cached_property
from werkzeug.wrappers import BaseResponse
os.environ['DEBUG']='1'
from seattle_flu_incidence_mapper.server import app
from seattle_flu_incidence_mapper.app import app
BASE_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), 'fixtures'))