Bug 290402: Functions to support reading-in a Schema object from the database

Patch by Max Kanat-Alexander <mkanat@bugzilla.org> r=Tomas.Kopal, a=justdave
2005-04-17 07:22:42 +00:00 · 2005-04-17 07:22:42 +00:00 · ad70ac135e
--- a/webtools/bugzilla/Bugzilla/DB.pm
+++ b/webtools/bugzilla/Bugzilla/DB.pm
@ -425,6 +425,40 @@ sub bz_add_index {
    }
 }

+sub bz_add_table {
+    my ($self, $name) = @_;
+
+    my $table_exists = $self->bz_table_info($name);
+
+    if (!$table_exists) {
+        $self->_bz_add_table_raw($name);
+        $self->_bz_real_schema->add_table($name,
+            $self->_bz_schema->get_table_abstract($name));
+        $self->_bz_store_real_schema;
+    }
+}
+
+# _bz_add_table_raw($name) - Private
+#
+# Description: A helper function for bz_add_table.
+#              Creates a table in the database without
+#              updating any Schema object. Generally
+#              should only be called by bz_add_table and by
+#              _bz_init_schema_storage. Used when you don't
+#              yet have a Schema object but you need to
+#              add a table, for some reason.
+# Params:      $name - The name of the table you're creating. 
+#                  The definition for the table is pulled from 
+#                  _bz_schema.
+# Returns:     nothing
+#
+sub _bz_add_table_raw {
+    my ($self, $name) = @_;
+    my @statements = $self->_bz_schema->get_table_ddl($name);
+    print "Adding new table $name ...\n";
+    $self->do($_) foreach (@statements);
+}
+
 # XXX - Need to make this cross-db compatible
 # XXX - This shouldn't print stuff to stdout
 sub bz_change_field_type ($$$) {
@ -579,9 +613,23 @@ sub _bz_schema {
    return $self->{private_bz_schema};
 }

+# _bz_get_initial_schema()
+#
+# Description: A protected method, intended for use only by Bugzilla::DB
+#              and subclasses. Used to get the initial Schema that will
+#              be wirtten to disk for _bz_init_schema_storage. You probably
+#              want to use _bz_schema or _bz_real_schema instead of this
+#              method.
+# Params:      none
+# Returns:     A Schema object that can be serialized and written to disk
+#              for _bz_init_schema_storage.
+sub _bz_get_initial_schema {
+    my ($self) = @_;
+    return $self->_bz_schema->get_empty_schema();
+}
+
 sub bz_column_info {
    my ($self, $table, $column) = @_;
-
    return $self->_bz_real_schema->get_column_abstract($table, $column);
 }

@ -595,6 +643,11 @@ sub bz_index_info {
    return $index_def;
 }

+sub bz_table_info {
+    my ($self, $table) = @_;
+    return $self->_bz_real_schema->get_table_abstract($table);
+}
+

 # XXX - Needs to be made cross-db compatible.
 sub bz_get_field_def ($$) {
@ -837,7 +890,8 @@ sub _bz_real_schema {
    my ($data, $version) = $self->selectrow_array(
        "SELECT schema_data, version FROM bz_schema");

-    # XXX - Should I do the undef check here instead of in checksetup?
+    (die "_bz_real_schema tried to read the bz_schema table but it's empty!")
+        if !$data;

    $self->{private_real_schema} = 
        $self->_bz_schema->deserialize_abstract($data, $version);
@ -870,11 +924,13 @@ sub _bz_store_real_schema {
    # that we read from the database. So we use the actual hash
    # member instead of the subroutine call. If the hash
    # member is not defined, we will (and should) fail.
-    my $store_me = $self->{private_real_schema}->serialize_abstract();
+    my $update_schema = $self->{private_real_schema};
+    my $store_me = $update_schema->serialize_abstract();
+    my $schema_version = $update_schema->SCHEMA_VERSION;
    my $sth = $self->prepare("UPDATE bz_schema 
                                 SET schema_data = ?, version = ?");
    $sth->bind_param(1, $store_me, $self->BLOB_TYPE);
-    $sth->bind_param(2, Bugzilla::DB::Schema::SCHEMA_VERSION);
+    $sth->bind_param(2, $schema_version);
    $sth->execute();
 }

@ -910,6 +966,7 @@ Bugzilla::DB - Database access routines, using L<DBI>
  # Schema Modification
  $dbh->bz_add_column($table, $name, \%definition, $init_value);
  $dbh->bz_add_index($table, $name, $definition);
+  $dbh->bz_add_table($name);
  $dbh->bz_drop_index($table, $name);
  $dbh->bz_alter_column($table, $name, \%new_def);
  $dbh->bz_drop_column($table, $column);
@ -1247,6 +1304,18 @@ C<Bugzilla::DB::Schema::ABSTRACT_SCHEMA>.
                            Either a hashref or an arrayref.
 Returns:     nothing

+=item C<bz_add_table($name)>
+
+ Description: Creates a new table in the database, based on the
+              definition for that table in the abstract schema.
+              Note that unlike the other 'add' functions, this does
+              not take a definition, but always creates the table
+              as it exists in the ABSTRACT_SCHEMA.
+              If a table with that name already exists, then this
+              function returns silently.
+ Params:      $name - The name of the table you want to create.
+ Returns:     nothing
+
 =item C<bz_drop_index($table, $name)>

 Description: Removes an index from the database. Prints out a brief
--- a/webtools/bugzilla/Bugzilla/DB/Mysql.pm
+++ b/webtools/bugzilla/Bugzilla/DB/Mysql.pm
@ -465,6 +465,33 @@ backwards-compatibility anyway, for versions of Bugzilla before 2.20.

 =over 4

+=item C<bz_column_info_real($table, $column)>
+
+ Description: Returns an abstract column definition for a column
+              as it actually exists on disk in the database.
+ Params:      $table - The name of the table the column is on.
+              $column - The name of the column you want info about.
+ Returns:     An abstract column definition.
+              If the column does not exist, returns undef.
+
+=cut
+
+sub bz_column_info_real {
+    my ($self, $table, $column) = @_;
+
+    # DBD::mysql does not support selecting a specific column,
+    # so we have to get all the columns on the table and find 
+    # the one we want.
+    my $info_sth = $self->column_info(undef, undef, $table, '%');
+    my $all_cols = $info_sth->fetchall_hashref("COLUMN_NAME");
+    my $col_data = $all_cols->{$column};
+
+    if (!defined $col_data) {
+        return undef;
+    }
+    return $self->_bz_schema->column_info_to_column($col_data);
+}
+
 =item C<bz_index_info_real($table, $index)>

 Description: Returns information about an index on a table in the database.
@ -519,6 +546,69 @@ sub bz_index_info_real {
    return $retval;
 }

+=item C<bz_index_list_real($table)>
+
+ Description: Returns a list of index names on a table in 
+              the database, as it actually exists on disk.
+ Params:      $table - The name of the table you want info about.
+ Returns:     An array of index names.
+
+=cut
+
+sub bz_index_list_real {
+    my ($self, $table) = @_;
+    my $sth = $self->prepare("SHOW INDEX FROM $table");
+    # Column 3 of a SHOW INDEX statement contains the name of the index.
+    return @{ $self->selectcol_arrayref($sth, {Columns => [3]}) };
+}
+
+#####################################################################
+# MySQL-Specific "Schema Builder"
+#####################################################################
+
+=back
+
+=head 1 MYSQL-SPECIFIC "SCHEMA BUILDER"
+
+MySQL needs to be able to read in a legacy database (from before 
+Schema existed) and create a Schema object out of it. That's what
+this code does.
+
+=cut
+
+# This sub itself is actually written generically, but the subroutines
+# that it depends on are database-specific. In particular, the
+# bz_column_info_real function would be very difficult to create
+# properly for any other DB besides MySQL.
+sub _bz_build_schema_from_disk {
+    my ($self) = @_;
+
+    print "Building Schema object from database...\n";
+
+    my $schema = $self->_bz_schema->get_empty_schema();
+
+    my @tables = $self->bz_table_list_real();
+    foreach my $table (@tables) {
+        $schema->add_table($table);
+        my @columns = $self->bz_table_columns_real($table);
+        foreach my $column (@columns) {
+            my $type_info = $self->bz_column_info_real($table, $column);
+            $schema->set_column($table, $column, $type_info);
+        }
+
+        my @indexes = $self->bz_index_list_real($table);
+        foreach my $index (@indexes) {
+            unless ($index eq 'PRIMARY') {
+                my $index_info = $self->bz_index_info_real($table, $index);
+                ($index_info = $index_info->{FIELDS}) 
+                    if (!$index_info->{TYPE});
+                $schema->set_index($table, $index, $index_info);
+            }
+        }
+    }
+
+    return $schema;
+}
 1;

 __END__
--- a/webtools/bugzilla/Bugzilla/DB/Schema.pm
+++ b/webtools/bugzilla/Bugzilla/DB/Schema.pm
@ -897,7 +897,8 @@ use constant ABSTRACT_SCHEMA => {

    whine_queries => {
        FIELDS => [
-            id            => {TYPE => 'MEDIUMSERIAL', PRIMARYKEY => 1},
+            id            => {TYPE => 'MEDIUMSERIAL', PRIMARYKEY => 1,
+                              NOTNULL => 1},
            eventid       => {TYPE => 'INT3', NOTNULL => 1},
            query_name    => {TYPE => 'varchar(64)', NOTNULL => 1,
                              DEFAULT => "''"},
@ -914,7 +915,8 @@ use constant ABSTRACT_SCHEMA => {

    whine_schedules => {
        FIELDS => [
-            id          => {TYPE => 'MEDIUMSERIAL', PRIMARYKEY => 1},
+            id          => {TYPE => 'MEDIUMSERIAL', PRIMARYKEY => 1,
+                            NOTNULL => 1},
            eventid     => {TYPE => 'INT3', NOTNULL => 1},
            run_day     => {TYPE => 'varchar(32)'},
            run_time    => {TYPE => 'varchar(32)'},
@ -930,7 +932,8 @@ use constant ABSTRACT_SCHEMA => {

    whine_events => {
        FIELDS => [
-            id           => {TYPE => 'MEDIUMSERIAL', PRIMARYKEY => 1},
+            id           => {TYPE => 'MEDIUMSERIAL', PRIMARYKEY => 1,
+                             NOTNULL => 1},
            owner_userid => {TYPE => 'INT3', NOTNULL => 1},
            subject      => {TYPE => 'varchar(128)'},
            body         => {TYPE => 'MEDIUMTEXT'},
@ -1211,7 +1214,7 @@ sub get_column {
    }
    return undef;
 } #eosub--get_column
-#--------------------------------------------------------------------------
+
 sub get_table_list {

 =item C<get_table_list>
@ -1584,7 +1587,7 @@ sub get_column_abstract {

    # Prevent a possible dereferencing of an undef hash, if the
    # table doesn't exist.
-    if (exists $self->{abstract_schema}->{$table}) {
+    if ($self->get_table_abstract($table)) {
        my %fields = (@{ $self->{abstract_schema}{$table}{FIELDS} });
        return $fields{$column};
    }
@ -1616,6 +1619,47 @@ sub get_index_abstract {
    return undef;
 }

+=item C<get_table_abstract($table)>
+
+ Description: Gets the abstract definition for a table in this Schema
+              object.
+ Params:      $table - The name of the table you want a definition for.
+ Returns:     An abstract table definition, or undef if the table doesn't
+              exist.
+
+=cut
+
+sub get_table_abstract {
+    my ($self, $table) = @_;
+    return $self->{abstract_schema}->{$table};
+}
+
+=item C<add_table($name, \%definition)>
+
+ Description: Creates a new table in this Schema object.
+              If you do not specify a definition, we will
+              simply create an empty table.
+ Params:      $name - The name for the new table.
+              \%definition (optional) - An abstract definition for
+                  the new table.
+ Returns:     nothing
+
+=cut
+sub add_table {
+    my ($self, $name, $definition) = @_;
+    (die "Table already exists: $name")
+        if exists $self->{abstract_schema}->{$name};
+    if ($definition) {
+        $self->{abstract_schema}->{$name} = dclone($definition);
+        $self->{schema} = dclone($self->{abstract_schema});
+        $self->_adjust_schema();
+    }
+    else {
+        $self->{abstract_schema}->{$name} = {FIELDS => []};
+        $self->{schema}->{$name}          = {FIELDS => []};
+    }
+}
+
 sub delete_column {

 =item C<delete_column($table, $column)>
@ -1705,6 +1749,11 @@ sub set_index {

    my ($self, $table, $name, $definition) = @_;

+    if ( exists $self->{abstract_schema}{$table}
+         && !exists $self->{abstract_schema}{$table}{INDEXES} ) {
+        $self->{abstract_schema}{$table}{INDEXES} = [];
+    }
+
    my $indexes = $self->{abstract_schema}{$table}{INDEXES};
    $self->_set_object($table, $name, $definition, $indexes);
 }
@ -1839,6 +1888,31 @@ sub deserialize_abstract {
    return $class->new(undef, $thawed_hash);
 }

+#####################################################################
+# Class Methods
+#####################################################################
+
+=back
+
+=head1 CLASS METHODS
+
+These methods are generally called on the class instead of on a specific
+object.
+
+=item C<get_empty_schema()>
+
+ Description: Returns a Schema that has no tables. In effect, this
+              Schema is totally "empty."
+ Params:      none
+ Returns:     A "empty" Schema object.
+
+=cut
+
+sub get_empty_schema {
+    my ($class) = @_;
+    return $class->deserialize_abstract(freeze({}));
+}
+
 1;
 __END__

--- a/webtools/bugzilla/Bugzilla/DB/Schema/Mysql.pm
+++ b/webtools/bugzilla/Bugzilla/DB/Schema/Mysql.pm
@ -33,6 +33,57 @@ use Bugzilla::Error;

 use base qw(Bugzilla::DB::Schema);

+# This is for column_info_to_column, to know when a tinyint is a 
+# boolean and when it's really a tinyint. This only has to be accurate
+# up to and through 2.19.3, because that's the only time we need
+# column_info_to_column.
+#
+# This is basically a hash of tables/columns, with one entry for each column
+# that should be interpreted as a BOOLEAN instead of as an INT1 when
+# reading in the Schema from the disk. The values are discarded; I just
+# used "1" for simplicity.
+use constant BOOLEAN_MAP => {
+    bugs           => {everconfirmed => 1, reporter_accessible => 1,
+                       cclist_accessible => 1, qacontact_accessible => 1,
+                       assignee_accessible => 1},
+    longdescs      => {isprivate => 1, already_wrapped => 1},
+    attachments    => {ispatch => 1, isobsolete => 1, isprivate => 1},
+    flags          => {is_active => 1},
+    flagtypes      => {is_active => 1, is_requestable => 1, 
+                       is_requesteeble => 1, is_multiplicable => 1},
+    fielddefs      => {mailhead => 1, obsolete => 1},
+    bug_status     => {isactive => 1},
+    resolution     => {isactive => 1},
+    bug_severity   => {isactive => 1},
+    priority       => {isactive => 1},
+    rep_platform   => {isactive => 1},
+    op_sys         => {isactive => 1},
+    profiles       => {mybugslink => 1, newemailtech => 1},
+    namedqueries   => {linkinfooter => 1, watchfordiffs => 1},
+    groups         => {isbuggroup => 1, isactive => 1},
+    group_control_map => {entry => 1, membercontrol => 1, othercontrol => 1,
+                          canedit => 1},
+    group_group_map => {isbless => 1},
+    user_group_map => {isbless => 1, isderived => 1},
+    products       => {disallownew => 1},
+    series         => {public => 1},
+    whine_queries  => {onemailperbug => 1},
+    quips          => {approved => 1},
+    setting        => {is_enabled => 1}
+};
+
+# Maps the db_specific hash backwards, for use in column_info_to_column.
+use constant REVERSE_MAPPING => {
+    # Boolean and the SERIAL fields are handled in column_info_to_column,
+    # and so don't have an entry here.
+    TINYINT   => 'INT1',
+    SMALLINT  => 'INT2',
+    MEDIUMINT => 'INT3',
+    INTEGER   => 'INT4',
+    # All the other types have the same name in their abstract version
+    # as in their db-specific version, so no reverse mapping is needed.
+};
+
 #------------------------------------------------------------------------------
 sub _initialize {

@ -110,6 +161,123 @@ sub get_drop_index_ddl {
    return ("DROP INDEX \`$name\` ON $table");
 }

+# Converts a DBI column_info output to an abstract column definition.
+# Expects to only be called by Bugzila::DB::Mysql::_bz_build_schema_from_disk,
+# although there's a chance that it will also work properly if called
+# elsewhere.
+sub column_info_to_column {
+    my ($self, $column_info) = @_;
+
+    # Unfortunately, we have to break Schema's normal "no database"
+    # barrier a few times in this function.
+    my $dbh = Bugzilla->dbh;
+
+    my $table = $column_info->{TABLE_NAME};
+    my $col_name = $column_info->{COLUMN_NAME};
+
+    my $column = {};
+
+    ($column->{NOTNULL} = 1) if $column_info->{NULLABLE} == 0;
+
+    if ($column_info->{mysql_is_pri_key}) {
+        # In MySQL, if a table has no PK, but it has a UNIQUE index,
+        # that index will show up as the PK. So we have to eliminate
+        # that possibility.
+        # Unfortunately, the only way to definitely solve this is
+        # to break Schema's standard of not touching the live database
+        # and check if the index called PRIMARY is on that field.
+        my $pri_index = $dbh->bz_index_info_real($table, 'PRIMARY');
+        if ( $pri_index && grep($_ eq $col_name, @{$pri_index->{FIELDS}}) ) {
+            $column->{PRIMARYKEY} = 1;
+        }
+    }
+
+    # MySQL frequently defines a default for a field even when we
+    # didn't explicitly set one. So we have to have some special
+    # hacks to determine whether or not we should actually put
+    # a default in the abstract schema for this field.
+    if (defined $column_info->{COLUMN_DEF}) {
+        # The defaults that MySQL inputs automatically are usually
+        # something that would be considered "false" by perl, either
+        # a 0 or an empty string. (Except for ddatetime and decimal
+        # fields, which have their own special auto-defaults.)
+        #
+        # Here's how we handle this: If it exists in the schema
+        # without a default, then we don't use the default. If it
+        # doesn't exist in the schema, then we're either going to
+        # be dropping it soon, or it's a custom end-user column, in which
+        # case having a bogus default won't harm anything.
+        my $schema_column = $self->get_column($table, $col_name);
+        unless ( (!$column_info->{COLUMN_DEF} 
+                  || $column_info->{COLUMN_DEF} eq '0000-00-00 00:00:00'
+                  || $column_info->{COLUMN_DEF} eq '0.00')
+                && $schema_column 
+                && !exists $schema_column->{DEFAULT}) {
+            
+            my $default = $column_info->{COLUMN_DEF};
+            # Schema uses '0' for the defaults for decimal fields. 
+            $default = 0 if $default =~ /^0\.0+$/;
+            # If we're not a number, we're a string and need to be
+            # quoted.
+            $default = $dbh->quote($default) if !($default =~ /^(-)?(\d+)(.\d+)?$/);
+            $column->{DEFAULT} = $default;
+        }
+    }
+
+    my $type = $column_info->{TYPE_NAME};
+
+    # Certain types of columns need the size/precision appended.
+    if ($type =~ /CHAR$/ || $type eq 'DECIMAL') {
+        # This is nicely lowercase and has the size/precision appended.
+        $type = $column_info->{mysql_type_name};
+    }
+
+    # If we're a tinyint, we could be either a BOOLEAN or an INT1.
+    # Only the BOOLEAN_MAP knows the difference.
+    elsif ($type eq 'TINYINT' && exists BOOLEAN_MAP->{$table}
+           && exists BOOLEAN_MAP->{$table}->{$col_name}) {
+        $type = 'BOOLEAN';
+        if (exists $column->{DEFAULT}) {
+            $column->{DEFAULT} = $column->{DEFAULT} ? 'TRUE' : 'FALSE';
+        }
+    }
+
+    # We also need to check if we're an auto_increment field.
+    elsif ($type =~ /INT/) {
+        # Unfortunately, the only way to do this in DBI is to query the
+        # database, so we have to break the rule here that Schema normally
+        # doesn't touch the live DB.
+        my $ref_sth = $dbh->prepare(
+            "SELECT $col_name FROM $table LIMIT 1");
+        $ref_sth->execute;
+        if ($ref_sth->{mysql_is_auto_increment}->[0]) {
+            if ($type eq 'MEDIUMINT') {
+                $type = 'MEDIUMSERIAL';
+            }
+            elsif ($type eq 'SMALLINT') {
+                $type = 'SMALLSERIAL';
+            } 
+            else {
+                $type = 'INTSERIAL';
+            }
+        }
+        $ref_sth->finish;
+
+    }
+
+    # For all other db-specific types, check if they exist in 
+    # REVERSE_MAPPING and use the type found there.
+    if (exists REVERSE_MAPPING->{$type}) {
+        $type = REVERSE_MAPPING->{$type};
+    }
+
+    $column->{TYPE} = $type;
+
+    #print "$table.$col_name: " . Data::Dumper->Dump([$column]) . "\n";
+
+    return $column;
+}
+
 sub get_rename_column_ddl {
    my ($self, $table, $old_name, $new_name) = @_;
    my $def = $self->get_type_ddl($self->get_column($table, $old_name));