Add db migration(will help with model migration)

Update image to support db migration Add migration to add model_keys to help eliminate duplicates through indexing
2019-05-16 15:21:06 -07:00 · 2019-05-16 15:21:06 -07:00 · ad24a414b7
--- a/api_service/Dockerfile
+++ b/api_service/Dockerfile
@ -35,6 +35,8 @@ VOLUME /model_data
 # Finish the setup
 ADD docker_scripts/nginx.conf /etc/nginx/nginx.conf
 ADD docker_scripts/entrypoint.sh docker_scripts/wsgi.ini ./
+# Add migrations
+ADD migrations ./migrations
 # Add the rest of our library here
 ADD seattle_flu_incidence_mapper ./seattle_flu_incidence_mapper

--- a/api_service/docker_scripts/wsgi.ini
+++ b/api_service/docker_scripts/wsgi.ini
@ -1,5 +1,5 @@
 [uwsgi]
-module = seattle_flu_incidence_mapper.server:app
+module = seattle_flu_incidence_mapper.app:app
 master = true
 # Set the processes to total CPU count
 processes = %k
--- a/api_service/migrations/README
+++ b/api_service/migrations/README
@ -0,0 +1 @@
+Generic single-database configuration.
--- a/api_service/migrations/alembic.ini
+++ b/api_service/migrations/alembic.ini
@ -0,0 +1,45 @@
+# A generic, single database configuration.
+
+[alembic]
+# template used to generate migration files
+# file_template = %%(rev)s_%%(slug)s
+
+# set to 'true' to run the environment during
+# the 'revision' command, regardless of autogenerate
+# revision_environment = false
+
+
+# Logging configuration
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARN
+handlers = console
+qualname =
+
+[logger_sqlalchemy]
+level = WARN
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
--- a/api_service/migrations/env.py
+++ b/api_service/migrations/env.py
@ -0,0 +1,95 @@
+from __future__ import with_statement
+
+import logging
+from logging.config import fileConfig
+
+from sqlalchemy import engine_from_config
+from sqlalchemy import pool
+
+from alembic import context
+
+# this is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
+config = context.config
+
+# Interpret the config file for Python logging.
+# This line sets up loggers basically.
+fileConfig(config.config_file_name)
+logger = logging.getLogger('alembic.env')
+
+# add your model's MetaData object here
+# for 'autogenerate' support
+# from myapp import mymodel
+# target_metadata = mymodel.Base.metadata
+from flask import current_app
+config.set_main_option('sqlalchemy.url',
+                       current_app.config.get('SQLALCHEMY_DATABASE_URI'))
+target_metadata = current_app.extensions['migrate'].db.metadata
+
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+# ... etc.
+
+
+def run_migrations_offline():
+    """Run migrations in 'offline' mode.
+
+    This configures the context with just a URL
+    and not an Engine, though an Engine is acceptable
+    here as well.  By skipping the Engine creation
+    we don't even need a DBAPI to be available.
+
+    Calls to context.execute() here emit the given string to the
+    script output.
+
+    """
+    url = config.get_main_option("sqlalchemy.url")
+    context.configure(
+        url=url, target_metadata=target_metadata, literal_binds=True
+    )
+
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def run_migrations_online():
+    """Run migrations in 'online' mode.
+
+    In this scenario we need to create an Engine
+    and associate a connection with the context.
+
+    """
+
+    # this callback is used to prevent an auto-migration from being generated
+    # when there are no changes to the schema
+    # reference: http://alembic.zzzcomputing.com/en/latest/cookbook.html
+    def process_revision_directives(context, revision, directives):
+        if getattr(config.cmd_opts, 'autogenerate', False):
+            script = directives[0]
+            if script.upgrade_ops.is_empty():
+                directives[:] = []
+                logger.info('No changes in schema detected.')
+
+    connectable = engine_from_config(
+        config.get_section(config.config_ini_section),
+        prefix='sqlalchemy.',
+        poolclass=pool.NullPool,
+    )
+
+    with connectable.connect() as connection:
+        context.configure(
+            connection=connection,
+            target_metadata=target_metadata,
+            process_revision_directives=process_revision_directives,
+            **current_app.extensions['migrate'].configure_args
+        )
+
+        with context.begin_transaction():
+            context.run_migrations()
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
--- a/api_service/migrations/script.py.mako
+++ b/api_service/migrations/script.py.mako
@ -0,0 +1,24 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision = ${repr(up_revision)}
+down_revision = ${repr(down_revision)}
+branch_labels = ${repr(branch_labels)}
+depends_on = ${repr(depends_on)}
+
+
+def upgrade():
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade():
+    ${downgrades if downgrades else "pass"}
--- a/api_service/migrations/versions/2c92fa01c7ef_.py
+++ b/api_service/migrations/versions/2c92fa01c7ef_.py
@ -0,0 +1,101 @@
+"""empty message
+
+Revision ID: 2c92fa01c7ef
+Revises: 7d4c7414c89e
+Create Date: 2019-05-16 12:39:40.321616
+
+"""
+import hashlib
+from datetime import datetime
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+from sqlalchemy import Column, String, DateTime
+from sqlalchemy.orm import sessionmaker
+
+from seattle_flu_incidence_mapper.orm_config import get_declarative_base
+
+connection = op.get_bind()
+Session = sessionmaker()
+
+
+revision = '2c92fa01c7ef'
+down_revision = '7d4c7414c89e'
+branch_labels = None
+depends_on = None
+
+
+old_model_name = 'pathogen_model'
+new_model_name = 'generic_model'
+
+
+class PathogenModel(get_declarative_base()):
+    __tablename__ = 'pathogen_model'
+    id = Column(String,  primary_key=True)
+    name = Column(String)
+    query_str = Column(String)
+    model_type = Column(String)
+    rds_key = Column(String)
+    model_key = Column(String)
+    created = Column(DateTime, primary_key=True, default=datetime.utcnow)
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    # sqlite doesn't support drop constraint so
+    # create our new table
+    op.create_table(new_model_name,
+                    sa.Column('id', sa.VARCHAR(), nullable=False, primary_key=True),
+                    sa.Column('name', sa.VARCHAR(), nullable=True),
+                    sa.Column('query_str', sa.VARCHAR(), nullable=True),
+                    sa.Column('model_type', sa.VARCHAR(), nullable=True),
+                    sa.Column('rds_key', sa.VARCHAR(), nullable=True),
+                    sa.Column('model_key', sa.VARCHAR(), nullable=False, primary_key=True),
+                    sa.Column('created', sa.DATETIME(), nullable=False),
+                    sa.PrimaryKeyConstraint('id', 'model_key')
+                    )
+
+    # add our column to our old table and populate the column
+    #with op.batch_alter_table(old_model_name) as batch_op:
+    #    batch_op.add_column(Column('model_key', String()))
+    populate_model_keys()
+
+    old_columns =  ['id', 'name', 'query_str', 'model_type', 'rds_key', 'model_key', 'created']
+    col_str = ",".join(old_columns)
+    op.execute(f'INSERT INTO {new_model_name} ({col_str}) SELECT {col_str} FROM {old_model_name};')
+    op.drop_table(old_model_name)
+     ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.rename_table(new_model_name, old_model_name)
+    with op.batch_alter_table(old_model_name) as batch_op:
+        batch_op.drop_column('rds_key')
+        batch_op.drop_column('model_key')
+    # ### end Alembic commands ###
+
+
+def populate_model_keys():
+    session = Session(bind=connection)
+    from seattle_flu_incidence_mapper.model_store import get_model_file
+    model_id_key_hash = {}
+    # let's calculate our model keys and rds keys
+    for model in session.query(PathogenModel).order_by(PathogenModel.created.desc()).all():
+        modelfile = get_model_file(model.id)
+        with open(modelfile, 'r') as mf:
+            model_key = hashlib.md5(mf.read().encode('utf-8')).hexdigest()
+            model.model_key = model_key
+        combo_id = model.id + model_key
+        if combo_id not in model_id_key_hash:
+            model_id_key_hash[model.id + model_key] = True
+            session.add(model)
+        else:
+            session.delete(model)
+
+    # now find models that have duplicate ids/model keys
+    # only keep the latest
+    session.flush()
+    session.commit()
--- a/api_service/migrations/versions/7d4c7414c89e_.py
+++ b/api_service/migrations/versions/7d4c7414c89e_.py
@ -0,0 +1,40 @@
+"""empty message
+
+Revision ID: 7d4c7414c89e
+Revises: 
+Create Date: 2019-05-16 12:35:09.965531
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+from sqlalchemy.engine import reflection
+
+revision = '7d4c7414c89e'
+down_revision = None
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    engine = op.get_bind().engine
+    table_names = sa.inspect(engine).get_table_names()
+    if 'pathogen_model' not in table_names:
+        op.create_table('pathogen_model',
+                        sa.Column('id', sa.VARCHAR(), nullable=False, primary_key=True),
+                        sa.Column('name', sa.VARCHAR(), nullable=True),
+                        sa.Column('query_str', sa.VARCHAR(), nullable=True),
+                        sa.Column('model_type', sa.VARCHAR(), nullable=True),
+                        sa.Column('rds_key', sa.VARCHAR(), nullable=True),
+                        sa.Column('created', sa.DATETIME(), nullable=False, primary_key=True),
+                        sa.PrimaryKeyConstraint('id', 'created')
+                        )
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # This is our first so nothing to do after this
+    pass
--- a/api_service/seattle_flu_incidence_mapper/app.py
+++ b/api_service/seattle_flu_incidence_mapper/app.py
@ -0,0 +1,7 @@
+
+
+# Get the application instance
+from seattle_flu_incidence_mapper.config import app
+
+if __name__ == "__main__":
+    app.cli()
--- a/api_service/seattle_flu_incidence_mapper/config.py
+++ b/api_service/seattle_flu_incidence_mapper/config.py
@ -2,6 +2,7 @@ import json
 import os
 import connexion
 from flask import Response
+from flask_migrate import Migrate
 from sqlalchemy.orm.exc import NoResultFound

 from seattle_flu_incidence_mapper.orm_config import setup_db
@ -9,26 +10,6 @@ from seattle_flu_incidence_mapper.utils import set_marshmallow, ModelExecutionEx

 basedir = os.path.abspath(os.path.dirname(__file__))
 true_vals = ['1', 'y', 'yes', 't', 'true']
-# Create the Connexion application instance
-connex_app = connexion.App("seattle_flu_incidence_mapper.config", specification_dir=os.path.join(basedir, 'swagger'))
-
-# Get the underlying Flask app instance
-app = connex_app.app
-app.config['WORKER_IMAGE'] = os.environ.get('WORKER_IMAGE', 'idm-docker-production.packages.idmod.org/sfim-worker:latest')
-app.config['MODEL_STORE'] = os.environ.get('MODEL_STORE', os.path.abspath(os.path.join(os.getcwd(), "../../test_model_store")))
-app.config['MODEL_HOST_PATH'] = os.environ.get('MODEL_HOST_PATH',  os.path.abspath(os.path.join(os.getcwd(), "../../test_model_store")))
-app.config['WORKER_JOB_HOST_PATH'] = os.environ.get('WORKER_JOB_HOST_PATH',  os.path.abspath(os.path.join(os.getcwd(), "../../test_jobs")))
-app.config['MODEL_JOB_PATH'] = os.environ.get('MODEL_JOB_PATH',  os.path.abspath(os.path.join(os.getcwd(), "../../test_jobs")))
-
-db = setup_db(basedir, app)
-set_marshmallow(app)
-
-# DO NOT MOVE this line. The order matters here
-# we need to init our db before loading our models
-from seattle_flu_incidence_mapper.models import *
-
-if os.environ.get('DEBUG', '0').lower() in true_vals or os.environ.get('CREATE_DB', '0').lower() in true_vals:
-    db.create_all()


 def sqlalchemy_error_handler(exception):
@ -41,5 +22,32 @@ def model_exec_error_handler(exception):
                    status=500, mimetype="application/json")


+
+# Create the Connexion application instance
+connex_app = connexion.App("seattle_flu_incidence_mapper.config", specification_dir=os.path.join(basedir, 'swagger'))
+
+# Get the underlying Flask app instance
+app = connex_app.app
+app.config['WORKER_IMAGE'] = os.environ.get('WORKER_IMAGE', 'idm-docker-production.packages.idmod.org/sfim-worker:latest')
+app.config['MODEL_STORE'] = os.environ.get('MODEL_STORE', os.path.abspath(os.path.join(os.getcwd(), "../../test_model_store")))
+app.config['MODEL_HOST_PATH'] = os.environ.get('MODEL_HOST_PATH',  os.path.abspath(os.path.join(os.getcwd(), "../../test_model_store")))
+app.config['WORKER_JOB_HOST_PATH'] = os.environ.get('WORKER_JOB_HOST_PATH',  os.path.abspath(os.path.join(os.getcwd(), "../../test_jobs")))
+app.config['MODEL_JOB_PATH'] = os.environ.get('MODEL_JOB_PATH',  os.path.abspath(os.path.join(os.getcwd(), "../../test_jobs")))
+
+db = setup_db(basedir, app)
+migrate = Migrate(app, db)
+set_marshmallow(app)
+
+# DO NOT MOVE this line. The order matters here
+# we need to init our db before loading our models
+from seattle_flu_incidence_mapper.models import *
+
+if os.environ.get('DEBUG', '0').lower() in true_vals or os.environ.get('CREATE_DB', '0').lower() in true_vals:
+    db.create_all()
+
 connex_app.add_error_handler(ModelExecutionException, model_exec_error_handler)
-connex_app.add_error_handler(NoResultFound, sqlalchemy_error_handler)
+connex_app.add_error_handler(NoResultFound, sqlalchemy_error_handler)
+
+# Read the swagger.yml file to configure the endpoints
+connex_app.add_api("swagger.yml")
+
--- a/api_service/seattle_flu_incidence_mapper/models/generic_model.py
+++ b/api_service/seattle_flu_incidence_mapper/models/generic_model.py
@ -1,20 +1,23 @@
 from datetime import datetime
-
-from seattle_flu_incidence_mapper.orm_config import db
+from sqlalchemy import String, Column, DateTime
+from seattle_flu_incidence_mapper.orm_config import get_session, get_declarative_base
 from seattle_flu_incidence_mapper.utils import ma

+base = get_declarative_base()

-class GenericModel(db.Model):
-    __tablename__ = 'pathogen_model'
-    id = db.Column(db.String,  primary_key=True)
-    name = db.Column(db.String)
-    query_str = db.Column(db.String)
-    model_type = db.Column(db.String)
-    rds_key = db.Column(db.String)
-    created = db.Column(db.DateTime, primary_key=True, default=datetime.utcnow)
+
+class GenericModel(base):
+    __tablename__ = 'generic_model'
+    id = Column(String,  primary_key=True)
+    name = Column(String)
+    query_str = Column(String)
+    model_type = Column(String)
+    model_key = Column(String, primary_key=True)
+    rds_key = Column(String)
+    created = Column(DateTime, default=datetime.utcnow)


 class GenericModelSchema(ma.ModelSchema):
    class Meta:
        model = GenericModel
-        sqla_session = db.session
+        sqla_session = get_session
--- a/api_service/seattle_flu_incidence_mapper/orm_config.py
+++ b/api_service/seattle_flu_incidence_mapper/orm_config.py
@ -1,22 +1,39 @@
+import functools
 import os

 from flask_sqlalchemy import SQLAlchemy
+from sqlalchemy.ext.declarative import declarative_base

 db = None
+session = None
+base = None
+
+
+def get_declarative_base():
+    global  base
+    if base is None:
+        base = declarative_base()
+    return base
+

 def get_db():
    return db


+def get_session():
+    return session
+
+
 def setup_db(basedir, app):
-    global  db
+    global db, session

    # Configure the SQLAlchemy part of the app instance
    app.config['SQLALCHEMY_ECHO'] = True
-    app.config['SQLALCHEMY_DATABASE_URI'] = os.getenv('SQLALCHEMY_DATABASE_URI','sqlite:////' + os.path.join(basedir, 'models.db'))
+    app.config['SQLALCHEMY_DATABASE_URI'] = os.getenv('SQLALCHEMY_DATABASE_URI', 'sqlite:////' + os.path.join(basedir, 'models.db'))
    app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False


    # Create the SQLAlchemy db instance
    db = SQLAlchemy(app)
-    return db
+    session = db.session
+    return db
--- a/api_service/seattle_flu_incidence_mapper/server.py
+++ b/api_service/seattle_flu_incidence_mapper/server.py
@ -1,13 +0,0 @@
-# local modules
-from connexion.exceptions import OAuthProblem
-from flask import request
-
-import seattle_flu_incidence_mapper.config as config
-
-# Get the application instance
-app = config.connex_app
-# Read the swagger.yml file to configure the endpoints
-app.add_api("swagger.yml")
-
-if __name__ == "__main__":
-    app.run(debug=True)
--- a/api_service/setup.py
+++ b/api_service/setup.py
@ -51,7 +51,7 @@ setup(
        'marshmallow-sqlalchemy',
        "docker",
        'psycopg2-binary',
-        ''
+        'Flask-Migrate'
    ],

    extras_require={
--- a/api_service/tests/base_api_test.py
+++ b/api_service/tests/base_api_test.py
@ -5,7 +5,7 @@ from flask.testing import FlaskClient
 from werkzeug.utils import cached_property
 from werkzeug.wrappers import BaseResponse
 os.environ['DEBUG']='1'
-from seattle_flu_incidence_mapper.server import app
+from seattle_flu_incidence_mapper.app import app

 BASE_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), 'fixtures'))