diff --git a/lib/dat/analysis.rb b/lib/dat/analysis.rb new file mode 100644 index 0000000..2f9ab11 --- /dev/null +++ b/lib/dat/analysis.rb @@ -0,0 +1,449 @@ +module Dat + class Analysis + end +end + +require 'dat/analysis/library' +require 'dat/analysis/matcher' +require 'dat/analysis/result' +require 'dat/analysis/registry' +require 'dat/analysis/tally' + +module Dat + # Public: Analyze the findings of an Experiment + # + # Typically implementors will wish to subclass this to provide their own + # implementations of the following methods suited to the environment where + # `dat-science` is being used: `#read`, `#count`, `#cook`. + # + # Example: + # + # class AnalyzeThis < Dat::Analysis + # # Read a result out of our redis stash + # def read + # RedisHandle.rpop "scienceness.#{experiment_name}.results" + # end + # + # # Query our redis stash to see how many new results are pending + # def count + # RedisHandle.llen("scienceness.#{experiment_name}.results") + # end + # + # # Deserialize a JSON-encoded result from redis + # def cook(raw_result) + # return nil unless raw_result + # JSON.parse raw_result + # end + # end + class Analysis + + # Public: Returns the name of the experiment + attr_reader :experiment_name + + # Public: Returns the current science mismatch result + attr_reader :current + + # Public: an alias for #current + alias_method :result, :current + + # Public: Returns a raw ("un-cooked") version of the current science mismatch result + attr_reader :raw + + # Public: Gets/Sets the base path for loading matcher and wrapper classes. + # Note that the base path will be appended with the experiment name + # before searching for wrappers and matchers. + attr_accessor :path + + # Public: Create a new Dat::Analysis object. Will load any matcher and + # wrapper classes for this experiment if `#path` is non-nil. + # + # experiment_name - The String naming the experiment to analyze. + # + # Examples + # + # analyzer = Dat::Analysis.new('bcrypt-passwords') + # => # + def initialize(experiment_name) + @experiment_name = experiment_name + @wrappers = [] + + load_classes unless path.nil? rescue nil + end + + # Public: process a raw science mismatch result to make it usable in analysis. + # This is typically overridden by subclasses to do any sort of unmarshalling + # or deserialization required. + # + # raw_result - a raw science mismatch result, typically, as returned by `#read` + # + # Returns a "cooked" science mismatch result. + def cook(raw_result) + raw_result + end + + # Public: fetch and summarize pending science mismatch results until an + # an unrecognized result is found. Outputs summaries to STDOUT. May + # modify current mismatch result. + # + # Returns nil. Leaves current mismatch result set to first unknown result, + # if one is found. + def analyze + track do + while true + unless more? + fetch # clear current result + return summarize_unknown_result + end + + print "." + fetch + break if unknown? + count_as_seen identify + end + + print "\n" + summarize_unknown_result + end + end + + # Public: skip pending mismatch results not satisfying the provided block. + # May modify current mismatch result. + # + # &block - block accepting a prepared mismatch result and returning true + # or false. + # + # Examples: + # + # jump_to do |result| + # result.user.staff? + # end + # + # jump_to do |result| + # result['group']['id'] > 100 && result['url'] =~ %r{/admin} + # end + # + # jump_to do |result| + # result['timestamp'].to_i > 1.hour.ago + # end + # + # Returns nil if no satisfying results are found. Current result will be nil. + # Returns count of remaining results if a satisfying result found. Leaves + # current result set to first result for which block returns a truthy value. + def jump_to(&block) + raise ArgumentError, "a block is required" unless block_given? + + while more? + fetch + return count if yield(current) + end + + # clear current result since nothing of interest was found. + @current = @identified = nil + end + + # Public: Are additional science mismatch results available? + # + # Returns true if more results can be fetched. + # Returns false if no more results can be fetched. + def more? + count != 0 + end + + # Public: retrieve a new science mismatch result, as returned by `#read`. + # + # Returns nil if no new science mismatch results are available. + # Returns a cooked and wrapped science mismatch result if available. + # Raises NoMethodError if `#read` is not defined on this class. + def fetch + @identified = nil + @raw = read + @current = raw ? prepare(raw) : nil + end + + # Public: Return a readable representation of the current science mismatch + # result. This will utilize the `#readable` methods declared on a matcher + # which identifies the current result. + # + # Returns a string containing a readable representation of the current + # science mismatch result. + # Returns nil if there is no current result. + def summary + return nil unless current + recognizer = identify + return readable unless recognizer && recognizer.respond_to?(:readable) + recognizer.readable + end + + # Public: Print a readable summary for the current science mismatch result + # to STDOUT. + # + # Returns nil. + def summarize + puts summary + end + + # Public: Is the current science mismatch result unidentifiable? + # + # Returns nil if current result is nil. + # Returns true if no matcher can identify current result. + # Returns false if a single matcher can identify the current result. + # Raises RuntimeError if multiple matchers can identify the current result. + def unknown? + return nil if current.nil? + !identify + end + + # Public: Find a matcher which can identify the current science mismatch result. + # + # Returns nil if current result is nil. + # Returns matcher class if a single matcher can identify current result. + # Returns false if no matcher can identify the current result. + # Raises RuntimeError if multiple matchers can identify the current result. + def identify + return @identified if @identified + + results = registry.identify(current) + if results.size > 1 + report_multiple_matchers(results) + end + + @identified = results.first + end + + # Internal: Output failure message about duplicate matchers for a science + # mismatch result. + # + # dupes - Array of Dat::Analysis::Matcher instances, initialized with a result + # + # Raises RuntimeError. + def report_multiple_matchers(dupes) + puts "\n\nMultiple matchers identified result:" + puts + + dupes.each_with_index do |matcher, i| + print " #{i+1}. " + if matcher.respond_to?(:readable) + puts matcher.readable + else + puts readable + end + end + + puts + raise "Result cannot be uniquely identified." + end + + # Internal: cook and wrap a raw science mismatch result. + # + # raw_result - an unmodified result, typically, as returned by `#read` + # + # Returns the science mismatch result processed by `#cook` and then by `#wrap`. + def prepare(raw_result) + wrap(cook(raw_result)) + end + + # Internal: wrap a "cooked" science mismatch result with any known wrapper methods + # + # cooked_result - a "cooked" mismatch result, as returned by `#cook` + # + # Returns the cooked science mismatch result, which will now respond to any + # instance methods found on our known wrapper classes + def wrap(cooked_result) + if !wrappers.empty? + cooked_result.send(:instance_variable_set, '@analyzer', self) + + class << cooked_result + define_method(:method_missing) do |meth, *args| + found = nil + @analyzer.wrappers.each do |wrapper| + next unless wrapper.public_instance_methods.detect {|m| m.to_s == meth.to_s } + found = wrapper.new(self).send(meth, *args) + break + end + found + end + end + end + + cooked_result + end + + # Internal: Return the *default* readable representation of the current science + # mismatch result. This method is typically overridden by subclasses or defined + # in matchers which wish to customize the readable representation of a science + # mismatch result. This implementation is provided as a default. + # + # Returns a string containing a readable representation of the current + # science mismatch result. + def readable + synopsis = [] + + synopsis << "Experiment %-20s first: %10s @ %s" % [ + "[#{current['experiment']}]", current['first'], current['timestamp'] + ] + synopsis << "Duration: control (%6.2f) | candidate (%6.2f)" % [ + current['control']['duration'], current['candidate']['duration'] + ] + + synopsis << "" + + if current['control']['exception'] + synopsis << "Control raised exception:\n\t#{current['control']['exception'].inspect}" + else + synopsis << "Control value: [#{current['control']['value']}]" + end + + if current['candidate']['exception'] + synopsis << "Candidate raised exception:\n\t#{current['candidate']['exception'].inspect}" + else + synopsis << "Candidate value: [#{current['candidate']['value']}]" + end + + synopsis << "" + + remaining = current.keys - ['control', 'candidate', 'experiment', 'first', 'timestamp'] + remaining.sort.each do |key| + if current[key].respond_to?(:keys) + # do ordered sorting of hash keys + subkeys = key_sort(current[key].keys) + synopsis << "\t%15s => {" % [ key ] + subkeys.each do |subkey| + synopsis << "\t%15s %15s => %-20s" % [ '', subkey, current[key][subkey].inspect ] + end + synopsis << "\t%15s }" % [ '' ] + else + synopsis << "\t%15s => %-20s" % [ key, current[key] ] + end + end + + synopsis.join "\n" + end + + def preferred_fields + %w(id name title owner description login username) + end + + def key_sort(keys) + str_keys = keys.map {|k| k.to_s } + (preferred_fields & str_keys) + (str_keys - preferred_fields) + end + + # Public: Which matcher classes are known? + # + # Returns: list of Dat::Analysis::Matcher classes known to this analyzer. + def matchers + registry.matchers + end + + # Public: Which wrapper classes are known? + # + # Returns: list of Dat::Analysis::Result classes known to this analyzer. + def wrappers + registry.wrappers + end + + # Public: Add a matcher or wrapper class to this analyzer. + # + # klass - a subclass of either Dat::Analysis::Matcher or Dat::Analysis::Result + # to be registered with this analyzer. + # + # Returns the list of known matchers and wrappers for this analyzer. + def add(klass) + klass.add_to_analyzer(self) + end + + # Public: Load matcher and wrapper classes from the library for our experiment. + # + # Returns: a list of loaded matcher and wrapper classes. + def load_classes + new_classes = library.select_classes do + experiment_files.each { |file| load file } + end + + new_classes.map {|klass| add klass } + end + + # Internal: Print to STDOUT a readable summary of the current (unknown) science + # mismatch result, as well a summary of the tally of identified science mismatch + # results analyzed to this point. + # + # Returns nil if there are no pending science mismatch results. + # Returns the number of pending science mismatch results. + def summarize_unknown_result + tally.summarize + if current + puts "\nFirst unidentifiable result:\n\n" + summarize + else + puts "\nNo unidentifiable results found. \\m/\n" + end + + more? ? count : nil + end + + # Internal: keep a tally of analyzed science mismatch results. + # + # &block: block which will presumably call `#count_as_seen` to update + # tallies of identified science mismatch results. + # + # Returns: value returned by &block. + def track(&block) + @tally = Tally.new + yield + end + + # Internal: Increment count for an object in an ongoing tally. + # + # obj - an Object for which we are recording occurrence counts + # + # Returns updated tally count for obj. + def count_as_seen(obj) + tally.count(obj.class.name || obj.class.inspect) + end + + # Internal: The current Tally instance. Cached between calls to `#track`. + # + # Returns the current Tally instance object. + def tally + @tally ||= Tally.new + end + + # Internal: handle to the library, used for collecting newly discovered + # matcher and wrapper classes. + # + # Returns: handle to the library class. + def library + Dat::Analysis::Library + end + + # Internal: registry of wrapper and matcher classes known to this analyzer. + # + # Returns a (cached between calls) handle to our registry instance. + def registry + @registry ||= Dat::Analysis::Registry.new + end + + # Internal: which class files are candidates for loading matchers and wrappers + # for this experiment? + # + # Returns: sorted Array of paths to ruby files which may contain declarations + # of matcher and wrapper classes for this experiment. + def experiment_files + Dir[File.join(path, experiment_name, '*.rb')].sort + end + + # Internal: Add a matcher class to this analyzer's registry. + # (Intended to be called only by Dat::Analysis::Matcher and subclasses) + def add_matcher(matcher_class) + puts "Loading matcher class [#{matcher_class}]" + registry.add matcher_class + end + + # Internal: Add a wrapper class to this analyzer's registry. + # (Intended to be called only by Dat::Analysis::Result and its subclasses) + def add_wrapper(wrapper_class) + puts "Loading results wrapper class [#{wrapper_class}]" + registry.add wrapper_class + end + end +end diff --git a/lib/dat/analysis/library.rb b/lib/dat/analysis/library.rb new file mode 100644 index 0000000..b35d071 --- /dev/null +++ b/lib/dat/analysis/library.rb @@ -0,0 +1,30 @@ +module Dat + # Internal: Keep a registry of Dat::Analysis::Matcher and + # Dat::Analysis::Result subclasses for use by an Dat::Analysis::Analysis + # instance. + class Analysis::Library + + @@known_classes = [] + + # Public: Collect matcher and results classes created by the + # provided block. + # + # &block - Block which instantiates matcher and results classes. + # + # Returns the newly-instantiated matcher and results classes. + def self.select_classes(&block) + @@known_classes = [] # prepare for registering new classes + yield + @@known_classes # return all the newly-registered classes + end + + # Public: register a matcher or results class. + # + # klass - a Dat::Analysis::Matcher or Dat::Analysis::Result subclass. + # + # Returns the current list of registered classes. + def self.add(klass) + @@known_classes << klass + end + end +end diff --git a/lib/dat/analysis/matcher.rb b/lib/dat/analysis/matcher.rb new file mode 100644 index 0000000..395fbe0 --- /dev/null +++ b/lib/dat/analysis/matcher.rb @@ -0,0 +1,43 @@ +module Dat + # Public: Base class for science mismatch results matchers. Subclasses + # implement the `#match?` instance method, which returns true when + # a provided science mismatch result is recognized by the matcher. + # + # Subclasses are expected to define `#match?`. + # + # Subclasses may optionally define `#readable` to return an alternative + # readable String representation of a cooked science mismatch result. The + # default implementation is defined in Dat::Analysis#readable. + class Analysis::Matcher + + # Public: The science mismatch result to be matched. + attr_reader :result + + # Internal: Called at subclass instantiation time to register the subclass + # with Dat::Analysis::Library. + # + # subclass - The Dat::Analysis::Matcher subclass being instantiated. + # + # Not intended to be called directly. + def self.inherited(subclass) + Dat::Analysis::Library.add subclass + end + + # Internal: Add this class to a Dat::Analysis instance. Intended to be + # called from Dat::Analysis to dispatch registration. + # + # analyzer - a Dat::Analysis instance for an experiment + # + # Returns the analyzer's updated list of known matcher classes. + def self.add_to_analyzer(analyzer) + analyzer.add_matcher self + end + + # Public: create a new Matcher. + # + # result - a science mismatch result, to be tested via `#match?` + def initialize(result) + @result = result + end + end +end diff --git a/lib/dat/analysis/registry.rb b/lib/dat/analysis/registry.rb new file mode 100644 index 0000000..67d68ff --- /dev/null +++ b/lib/dat/analysis/registry.rb @@ -0,0 +1,50 @@ +module Dat + # Internal: Registry of Dat::Analysis::Matcher and Dat::Analysis::Result + # classes. This is used to maintain the mapping of matchers and + # results wrappers for a particular Dat::Analysis instance. + class Analysis::Registry + + # Public: Create a new Registry instance. + def initialize + @known_classes = [] + end + + # Public: Add a matcher or results wrapper class to the registry + # + # klass - a Dat::Analysis::Matcher subclass or a Dat::Analysis::Result + # subclass, to be added to the registry. + # + # Returns the list of currently registered classes. + def add(klass) + @known_classes << klass + end + + # Public: Get the list of known Dat::Analysis::Matcher subclasses + # + # Returns the list of currently known matcher classes. + def matchers + @known_classes.select {|c| c <= ::Dat::Analysis::Matcher } + end + + # Public: Get the list of known Dat::Analysis::Result subclasses + # + # Returns the list of currently known result wrapper classes. + def wrappers + @known_classes.select {|c| c <= ::Dat::Analysis::Result } + end + + # Public: Get list of Dat::Analysis::Matcher subclasses for which + # `#match?` is truthy for the given result. + # + # result - a cooked science mismatch result + # + # Returns a list of matchers initialized with the provided result. + def identify(result) + matchers.inject([]) do |hits, matcher| + instance = matcher.new(result) + hits << instance if instance.match? + hits + end + end + end +end diff --git a/lib/dat/analysis/result.rb b/lib/dat/analysis/result.rb new file mode 100644 index 0000000..2461e9c --- /dev/null +++ b/lib/dat/analysis/result.rb @@ -0,0 +1,39 @@ +module Dat + # Public: Base class for wrappers around science mismatch results. + # + # Instance methods defined on subclasses will be added as instance methods + # on science mismatch results handled by Dat::Analysis instances which + # add the wrapper subclass via Dat::Analysis#add or Dat::Analysis#load_classes. + class Analysis::Result + + # Public: return the current science mismatch result + attr_reader :result + + # Internal: Called at subclass instantiation time to register the subclass + # with Dat::Analysis::Library. + # + # subclass - The Dat::Analysis::Result subclass being instantiated. + # + # Not intended to be called directly. + def self.inherited(subclass) + Dat::Analysis::Library.add subclass + end + + # Internal: Add this class to a Dat::Analysis instance. Intended to be + # called from Dat::Analysis to dispatch registration. + # + # analyzer - a Dat::Analysis instance for an experiment + # + # Returns the analyzer's updated list of known result wrapper classes. + def self.add_to_analyzer(analyzer) + analyzer.add_wrapper self + end + + # Public: create a new Result wrapper. + # + # result - a science mismatch result, to be wrapped with our instance methods. + def initialize(result) + @result = result + end + end +end diff --git a/lib/dat/analysis/tally.rb b/lib/dat/analysis/tally.rb new file mode 100644 index 0000000..6ae778a --- /dev/null +++ b/lib/dat/analysis/tally.rb @@ -0,0 +1,59 @@ +module Dat + # Internal: Track and summarize counts of occurrences of mismatch objects. + # + # Examples + # + # tally = Dat::Analysis::Tally.new + # tally.count('foo') + # => 1 + # tally.count('bar') + # => 1 + # tally.count('foo') + # => 2 + # puts tally.summary + # Summary of known mismatches found: + # foo 2 + # bar 1 + # TOTAL: 3 + # => nil + # + class Analysis::Tally + + # Public: Returns the hash of recorded mismatches. + attr_reader :tally + + def initialize + @tally = {} + end + + # Public: record an occurrence of a mismatch class. + def count(klass) + tally[klass] ||= 0 + tally[klass] += 1 + end + + # Public: Return a String summary of mismatches seen so far. + # + # Returns a printable String summarizing the counts of mismatches seen, + # sorted in descending count order. + def summary + return "\nNo results identified.\n" if tally.keys.empty? + result = [ "\nSummary of identified results:\n" ] + sum = 0 + tally.keys.sort_by {|k| -1*tally[k] }.each do |k| + sum += tally[k] + result << "%30s: %6d" % [k, tally[k]] + end + result << "%30s: %6d" % ['TOTAL', sum] + result.join "\n" + end + + # Public: prints a summary of mismatches seen so far to STDOUT (see + # `#summary` above). + # + # Returns nil. + def summarize + puts summary + end + end +end