All but a few tests passing for github sql

This commit is contained in:
John Nunemaker 2017-03-13 16:06:29 -04:00
Родитель c29d04a9d4
Коммит fb28516dd1
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: D80B5604D6DC62E0
4 изменённых файлов: 659 добавлений и 0 удалений

Просмотреть файл

@ -33,4 +33,9 @@ Gem::Specification.new do |spec|
spec.add_development_dependency "bundler", "~> 1.14"
spec.add_development_dependency "rake", "~> 10.0"
spec.add_development_dependency "minitest", "~> 5.0"
spec.add_development_dependency "timecop", "~> 0.8.1"
spec.add_development_dependency "activerecord", "~> 5.0"
spec.add_development_dependency "activesupport"
spec.add_development_dependency "activerecord-mysql2-adapter"
spec.add_development_dependency "mysql2", "~> 0.3.10"
end

Просмотреть файл

@ -1,5 +1,6 @@
require "github/data/version"
require "github/data/result"
require "github/data/sql"
module Github
module Data

448
lib/github/data/sql.rb Normal file
Просмотреть файл

@ -0,0 +1,448 @@
require "active_record"
require "active_support/all"
module GitHub
module Data
# Public: Build and execute a SQL query, returning results as Arrays. This
# class uses ActiveRecord's connection classes, but provides a better API for
# bind values and raw data access.
#
# Example:
#
# sql = GitHub::Data::SQL.new(<<-SQL, :parent_ids => parent_ids, :network_id => network_id)
# SELECT * FROM repositories
# WHERE source_id = :network_id AND parent_id IN :parent_ids
# SQL
# sql.results
# => returns an Array of Arrays, one for each row
# sql.hash_results
# => returns an Array of Hashes instead
#
# Things to be aware of:
#
# * `nil` is always considered an error and not a usable value. If you need a
# SQL NULL, use the NULL constant instead.
#
# * Identical column names in SELECTs will be overridden:
# `SELECT t1.id, t2.id FROM...` will only return one value for `id`. To get
# more than one column of the same name, use aliases:
# `SELECT t1.id t1_id, t2.id t2_id FROM ...`
#
# * Arrays are escaped as `(item, item, item)`. If you need to insert multiple
# rows (Arrays of Arrays), you must specify the bind value using
# GitHub::Data::SQL::ROWS(array_of_arrays).
#
class SQL
# Internal: a SQL literal value.
class Literal
# Public: the string value of this literal
attr_reader :value
def initialize(value)
@value = value.to_s.dup.freeze
end
def inspect
"<#{self.class.name} #{value}>"
end
end
# Internal: a list of arrays of values for insertion into SQL.
class Rows
# Public: the Array of row values
attr_reader :values
def initialize(values)
unless values.all? { |v| v.is_a? Array }
raise ArgumentError, "cannot instantiate SQL rows with anything but arrays"
end
@values = values.dup.freeze
end
def inspect
"<#{self.class.name} #{values.inspect}>"
end
end
# Public: Instantiate a literal SQL value.
#
# WARNING: The given value is LITERALLY inserted into your SQL without being
# escaped, so use this with extreme caution.
def self.LITERAL(string) # rubocop:disable Style/MethodName
Literal.new(string)
end
# Public: Escape a binary SQL value
#
# Used when a column contains binary data which needs to be escaped
# to prevent warnings from MySQL
def self.BINARY(string) # rubocop:disable Style/MethodName
GitHub::Data::SQL.LITERAL(GitHub::Data::SQL.BINARY_LITERAL(string))
end
# Public: Escape a binary SQL value, yielding a string which can be used as
# a literal in SQL
#
# Performs the core escaping logic for binary strings in MySQL
def self.BINARY_LITERAL(string) # rubocop:disable Style/MethodName
"x'#{string.unpack("H*")[0]}'"
end
# Public: Instantiate a list of Arrays of SQL values for insertion.
def self.ROWS(rows) # rubocop:disable Style/MethodName
Rows.new(rows)
end
# Public: prepackaged literal values.
NULL = Literal.new "NULL"
NOW = Literal.new "NOW()"
# Public: A superclass for errors.
class Error < RuntimeError
end
# Public: Raised when a bound ":keyword" value isn't available.
class BadBind < Error
def initialize(keyword)
super "There's no bind value for #{keyword.inspect}"
end
end
# Public: Raised when a bound value can't be sanitized.
class BadValue < Error
def initialize(value, description = nil)
description ||= "a #{value.class.name}"
super "Can't sanitize #{description}: #{value.inspect}"
end
end
# Internal: A Symbol-Keyed Hash of bind values.
attr_reader :binds
# Public: The SQL String to be executed. Modified in place.
attr_reader :query
# Public: Initialize a new instance.
#
# query - An initial SQL string (default: "").
# binds - A Hash of bind values keyed by Symbol (default: {}). There are
# a couple exceptions. If they clash with a bind value, add them
# in a later #bind or #add call.
#
# :connection - An ActiveRecord Connection adapter.
# :force_timezone - A Symbol describing the ActiveRecord default
# timezone. Either :utc or :local.
#
def initialize(query = nil, binds = nil)
if query.is_a? Hash
binds = query
query = nil
end
@binds = binds ? binds.dup : {}
@query = ""
@connection = @binds.delete :connection
@force_tz = @binds.delete :force_timezone
add query if !query.nil?
end
# Public: Add a chunk of SQL to the query. Any ":keyword" tokens in the SQL
# will be replaced with database-safe values from the current binds.
#
# sql - A String containing a fragment of SQL.
# extras - A Hash of bind values keyed by Symbol (default: {}). These bind
# values are only be used to interpolate this SQL fragment,and
# aren't available to subsequent adds.
#
# Returns self.
# Raises GitHub::Data::SQL::BadBind for unknown keyword tokens.
def add(sql, extras = nil)
return self if sql.blank?
query << " " unless query.empty?
if @force_tz
zone = ActiveRecord::Base.default_timezone
ActiveRecord::Base.default_timezone = @force_tz
end
query << interpolate(sql.strip, extras)
self
ensure
ActiveRecord::Base.default_timezone = zone if @force_tz
end
# Public: Add a chunk of SQL to the query, unless query generated so far is empty.
#
# Example: use this for conditionally adding UNION when generating sets of SELECTs.
#
# sql - A String containing a fragment of SQL.
# extras - A Hash of bind values keyed by Symbol (default: {}). These bind
# values are only be used to interpolate this SQL fragment,and
# aren't available to subsequent adds.
#
# Returns self.
# Raises GitHub::Data::SQL::BadBind for unknown keyword tokens.
def add_unless_empty(sql, extras = nil)
return self if query.empty?
add sql, extras
end
# Public: The number of affected rows for this connection.
def affected_rows
@affected_rows || connection.raw_connection.affected_rows
end
# Public: Add additional bind values to be interpolated each time SQL
# is added to the query.
#
# hash - A Symbol-keyed Hash of new values.
#
# Returns self.
def bind(binds)
self.binds.merge! binds
self
end
# Internal: The object we use to execute SQL and retrieve results. Defaults
# to AR::B.connection, but can be overridden with a ":connection" key when
# initializing a new instance.
def connection
@connection || ActiveRecord::Base.connection
end
# Public: the number of rows found by the query.
#
# Returns FOUND_ROWS() if a SELECT query included SQL_CALC_FOUND_ROWS.
# Raises if SQL_CALC_FOUND_ROWS was not present in the query.
def found_rows
raise "no SQL_CALC_FOUND_ROWS clause present" unless defined? @found_rows
@found_rows
end
# Internal: Replace ":keywords" with sanitized values from binds or extras.
def interpolate(sql, extras = nil)
sql.gsub(/:[a-z][a-z0-9_]*/) do |raw|
sym = raw[1..-1].intern # O.o gensym
if extras && extras.include?(sym)
val = extras[sym]
elsif binds.include?(sym)
val = binds[sym]
end
raise BadBind.new raw if val.nil?
sanitize val
end
end
# Public: The last inserted ID for this connection.
def last_insert_id
connection.raw_connection.last_insert_id
end
# Public: Map each row to an instance of an ActiveRecord::Base subclass.
def models(klass)
return @models if defined? @models
return [] if frozen?
# Use select_all to retrieve hashes for each row instead of arrays of values.
@models = connection.
select_all(query, "#{klass.name} Load via #{self.class.name}").
collect! { |record| klass.send :instantiate, record }
retrieve_found_row_count
freeze
@models
end
# Public: Execute, memoize, and return the results of this query.
def results
return @results if defined? @results
return [] if frozen?
if @force_tz
zone = ActiveRecord::Base.default_timezone
ActiveRecord::Base.default_timezone = @force_tz
end
case query
when /\ADELETE/i
@affected_rows = connection.delete(query, "#{self.class.name} Delete")
when /\AINSERT/i
@last_insert_id = connection.insert(query, "#{self.class.name} Insert")
when /\AUPDATE/i
@affected_rows = connection.update(query, "#{self.class.name} Update")
when /\ASELECT/i
# Why not execute or select_rows? Because select_all hits the query cache.
@hash_results = connection.select_all(query, "#{self.class.name} Select")
@results = @hash_results.map(&:values)
else
@results = connection.execute(query, "#{self.class.name} Execute").to_a
end
@results ||= []
retrieve_found_row_count
freeze
@results
ensure
ActiveRecord::Base.default_timezone = zone if @force_tz
end
# Public: If the query is a SELECT, return an array of hashes instead of an array of arrays.
def hash_results
results
@hash_results || @results
end
# Public: Get first row of results.
def row
results.first
end
# Public: Execute, ignoring results. This is useful when the results of a
# query aren't important, often INSERTs, UPDATEs, or DELETEs.
#
# sql - An optional SQL string. See GitHub::Data::SQL#add for details.
# extras - Optional bind values. See GitHub::Data::SQL#add for details.
#
# Returns self.
def run(sql = nil, extras = nil)
add sql, extras if !sql.nil?
results
self
end
# Internal: when a SQL_CALC_FOUND_ROWS clause is present in a SELECT query,
# retrieve the FOUND_ROWS() value to get a count of the rows sans any
# LIMIT/OFFSET clause.
def retrieve_found_row_count
if query =~ /\A\s*SELECT\s+SQL_CALC_FOUND_ROWS\s+/i
@found_rows = connection.select_value "SELECT FOUND_ROWS()", self.class.name
end
end
# Public: Create and execute a new SQL query, ignoring results.
#
# sql - A SQL string. See GitHub::Data::SQL#add for details.
# bindings - Optional bind values. See GitHub::Data::SQL#add for details.
#
# Returns self.
def self.run(sql, bindings = {})
new(sql, bindings).run
end
# Public: Create and execute a new SQL query, returning its hash_result rows.
#
# sql - A SQL string. See GitHub::Data::SQL#add for details.
# bindings - Optional bind values. See GitHub::Data::SQL#add for details.
#
# Returns an Array of result hashes.
def self.hash_results(sql, bindings = {})
new(sql, bindings).hash_results
end
# Public: Create and execute a new SQL query, returning its result rows.
#
# sql - A SQL string. See GitHub::Data::SQL#add for details.
# bindings - Optional bind values. See GitHub::Data::SQL#add for details.
#
# Returns an Array of result arrays.
def self.results(sql, bindings = {})
new(sql, bindings).results
end
# Public: Create and execute a new SQL query, returning the value of the
# first column of the first result row.
#
# sql - A SQL string. See GitHub::Data::SQL#add for details.
# bindings - Optional bind values. See GitHub::Data::SQL#add for details.
#
# Returns a value or nil.
def self.value(sql, bindings = {})
new(sql, bindings).value
end
# Public: Create and execute a new SQL query, returning its values.
#
# sql - A SQL string. See GitHub::Data::SQL#add for details.
# bindings - Optional bind values. See GitHub::Data::SQL#add for details.
#
# Returns an Array of values.
def self.values(sql, bindings = {})
new(sql, bindings).values
end
# Internal: Make `value` database-safe. Ish.
def sanitize(value)
case value
when Integer
value.to_s
when Numeric, String
connection.quote value
when Array
raise BadValue.new(value, "an empty array") if value.empty?
raise BadValue.new(value, "a nested array") if value.any? { |v| v.is_a? Array }
"(" + value.map { |v| sanitize v }.join(", ") + ")"
when Literal
value.value
when Rows # rows for insertion
value.values.map { |v| sanitize v }.join(", ")
when Class
connection.quote value.name
when DateTime, Time, Date
connection.quote value.to_s(:db)
when true
connection.quoted_true
when false
connection.quoted_false
when Symbol
connection.quote value.to_s
else
raise BadValue, value
end
end
# Public: Get the first column of the first row of results.
def value
row && row.first
end
# Public: Is there a value?
def value?
!value.nil?
end
# Public: Get first column of every row of results.
#
# Returns an Array or nil.
def values
results.map(&:first)
end
end
end
end

Просмотреть файл

@ -0,0 +1,205 @@
require "test_helper"
require "timecop"
ActiveRecord::Migration.verbose = false
ActiveRecord::Base.establish_connection({
adapter: "mysql2",
database: "github_data_test",
})
class GitHub::Data::SQLTest < Minitest::Test
def self.test(name, &block)
define_method("test_#{name.gsub(/\W/, '_')}", &block)
end
local_time = Time.new(1970, 1, 1, 0, 0, 0)
Timecop.freeze(local_time) do
foo = GitHub::Data::SQL::LITERAL "foo"
rows = GitHub::Data::SQL::ROWS [[1, 2], [3, 4]]
SANITIZE_TESTS = [
[GitHub::Data::SQL, "'GitHub::Data::SQL'"],
[DateTime.now.utc, "'1970-01-01 00:00:00'"],
[Time.now.utc, "'1970-01-01 00:00:00'"],
[Time.now.utc.to_date, "'1970-01-01'"],
[true, "1"],
[false, "0"],
[17, "17"],
[1.7, "1.7"],
["corge", "'corge'"],
[:frumple, "'frumple'"],
[foo, "foo"],
[[1, 2], "(1, 2)"],
[rows, "(1, 2), (3, 4)"], # bulk inserts
]
BAD_VALUE_TESTS = [
Hash.new,
nil,
[],
[1, 2, nil],
[[1], [nil]],
[[1, 2], [3, 4]]
]
end
SANITIZE_TESTS.each do |input, expected|
test "#{input.inspect} sanitizes as #{expected.inspect}" do
assert_equal expected, GitHub::Data::SQL.new.sanitize(input)
end
end
BAD_VALUE_TESTS.each do |input|
test "#{input.inspect} (#{input.class}) raises BadValue when sanitized" do
assert_raises GitHub::Data::SQL::BadValue do
GitHub::Data::SQL.new.sanitize input
end
end
end
test "initialize with SQL string" do
str = "query"
sql = GitHub::Data::SQL.new str
assert_equal Hash.new, sql.binds
assert_equal str, sql.query
refute_same sql.query, str
end
test "initialize with binds" do
binds = { :key => "value" }
sql = GitHub::Data::SQL.new binds
assert_equal "", sql.query
assert_equal "value", sql.binds[:key]
refute_same sql.binds, binds
end
test "initialize with query and binds" do
sql = GitHub::Data::SQL.new "query :key", :key => "value"
assert_equal "query 'value'", sql.query
assert_equal "value", sql.binds[:key]
end
test "initialize with single-character binds" do
sql = GitHub::Data::SQL.new "query :x", :x => "y"
assert_equal "query 'y'", sql.query
assert_equal "y", sql.binds[:x]
end
test "add SQL" do
sql = GitHub::Data::SQL.new
sql.add("first").add "second"
assert_equal "first second", sql.query
end
test "add SQL with local binds" do
sql = GitHub::Data::SQL.new
sql.add ":local", :local => "value"
assert_equal "'value'", sql.query
assert_raises GitHub::Data::SQL::BadBind do
sql.add ":local" # the previous value doesn't persist
end
end
test "add SQL with leading and trailing whitespace" do
sql = GitHub::Data::SQL.new " query "
assert_equal "query", sql.query
end
test "add sql date without force_timezone" do
sql = GitHub::Data::SQL.new ":now", :now => Time.now.utc
parsed = Time.parse(sql.query[1..-2]) # leave out leading and ending single quotes
now = Time.now
assert_in_delta now.to_i, parsed.to_i, 3, "#{now.inspect} expected, #{parsed.inspect} actual"
end
test "add sql date with force_timezone" do
sql = GitHub::Data::SQL.new ":now", :now => Time.now.utc, :force_timezone => :utc
parsed = Time.parse(sql.query[1..-2]) # leave out leading and ending single quotes
# Time.parse assumes local time
# so this creates a local time using the properties from the current time in UTC
utc = Time.now.utc
utc_now = Time.local(utc.year, utc.month, utc.day, utc.hour, utc.min, utc.sec)
assert_in_delta utc_now.to_i, parsed.to_i, 3, "#{utc_now.inspect} expected, #{parsed.inspect} actual"
end
test "set some bind params" do
sql = GitHub::Data::SQL.new
sql.bind(:first => "firstval").bind(:second => "secondval")
assert_equal "firstval", sql.binds[:first]
assert_equal "secondval", sql.binds[:second]
end
test "provide a custom connection" do
sql = GitHub::Data::SQL.new :connection => "stub"
assert_equal "stub", sql.connection
assert_nil sql.binds[:connection]
end
test "uses the AR query cache when SELECTing" do
first, second = nil
ActiveRecord::Base.cache do
first = GitHub::Data::SQL.new("SELECT RAND()").value
second = GitHub::Data::SQL.new("SELECT RAND()").value
end
assert_in_delta first, second
end
test "add_unless_empty adds to a non-empty query" do
sql = GitHub::Data::SQL.new "non-empty"
sql.add_unless_empty "foo"
assert_includes sql.query, "foo"
end
test "add_unless_empty does not add to an empty query" do
sql = GitHub::Data::SQL.new
sql.add_unless_empty "foo"
refute_includes sql.query, "foo"
end
test "LITERAL returns a Literal" do
assert_kind_of GitHub::Data::SQL::Literal, GitHub::Data::SQL::LITERAL("foo")
end
test "ROWS returns a GitHub::Data::SQL::Rows instance" do
assert_kind_of GitHub::Data::SQL::Rows, GitHub::Data::SQL::ROWS([[1, 2, 3], [4, 5, 6]])
end
test "ROWS raises if non-arrays are provided" do
assert_raises(ArgumentError) do
GitHub::Data::SQL::ROWS([1, 2, 3])
end
end
test "affected_rows returns the affected row count" do
GitHub::Data::SQL.run("CREATE TEMPORARY TABLE affected_rows_test (x INT)")
GitHub::Data::SQL.run("INSERT INTO affected_rows_test VALUES (1), (2), (3), (4)")
sql = GitHub::Data::SQL.new("UPDATE affected_rows_test SET x = x + 1")
sql.run
assert_equal 4, sql.affected_rows
GitHub::Data::SQL.run("DROP TABLE affected_rows_test")
end
test "affected_rows returns the affected row count even when the query generates warnings" do
GitHub::Data::SQL.run("CREATE TEMPORARY TABLE affected_rows_test (x INT)")
GitHub::Data::SQL.run("INSERT INTO affected_rows_test VALUES (1), (2), (3), (4)")
sql = GitHub::Data::SQL.new("UPDATE affected_rows_test SET x = x + 1 WHERE 1 = '1x'")
sql.run
assert_equal 4, sql.affected_rows
GitHub::Data::SQL.run("DROP TABLE affected_rows_test")
end
end