initial import of dat-analysis classes

2013-04-18 15:04:56 -05:00 · 2013-04-18 15:04:56 -05:00 · 3b811e64a3
--- a/lib/dat/analysis.rb
+++ b/lib/dat/analysis.rb
@ -0,0 +1,449 @@
+module Dat
+  class Analysis
+  end
+end
+
+require 'dat/analysis/library'
+require 'dat/analysis/matcher'
+require 'dat/analysis/result'
+require 'dat/analysis/registry'
+require 'dat/analysis/tally'
+
+module Dat
+    # Public: Analyze the findings of an Experiment
+    #
+    # Typically implementors will wish to subclass this to provide their own
+    # implementations of the following methods suited to the environment where
+    # `dat-science` is being used:  `#read`, `#count`, `#cook`.
+    #
+    # Example:
+    #
+    #   class AnalyzeThis < Dat::Analysis
+    #     # Read a result out of our redis stash
+    #     def read
+    #       RedisHandle.rpop "scienceness.#{experiment_name}.results"
+    #     end
+    #
+    #     # Query our redis stash to see how many new results are pending
+    #     def count
+    #       RedisHandle.llen("scienceness.#{experiment_name}.results")
+    #     end
+    #
+    #     # Deserialize a JSON-encoded result from redis
+    #     def cook(raw_result)
+    #       return nil unless raw_result
+    #       JSON.parse raw_result
+    #     end
+    #   end
+    class Analysis
+
+    # Public: Returns the name of the experiment
+    attr_reader   :experiment_name
+
+    # Public: Returns the current science mismatch result
+    attr_reader   :current
+
+    # Public: an alias for #current
+    alias_method  :result, :current
+
+    # Public: Returns a raw ("un-cooked") version of the current science mismatch result
+    attr_reader   :raw
+
+    # Public: Gets/Sets the base path for loading matcher and wrapper classes.
+    #         Note that the base path will be appended with the experiment name
+    #         before searching for wrappers and matchers.
+    attr_accessor :path
+
+    # Public: Create a new Dat::Analysis object.  Will load any matcher and
+    #         wrapper classes for this experiment if `#path` is non-nil.
+    #
+    # experiment_name - The String naming the experiment to analyze.
+    #
+    # Examples
+    #
+    #   analyzer = Dat::Analysis.new('bcrypt-passwords')
+    #   => #<Dat::Analysis:...>
+    def initialize(experiment_name)
+      @experiment_name = experiment_name
+      @wrappers = []
+
+      load_classes unless path.nil? rescue nil
+    end
+
+    # Public: process a raw science mismatch result to make it usable in analysis.
+    # This is typically overridden by subclasses to do any sort of unmarshalling
+    # or deserialization required.
+    #
+    # raw_result - a raw science mismatch result, typically, as returned by `#read`
+    #
+    # Returns a "cooked" science mismatch result.
+    def cook(raw_result)
+      raw_result
+    end
+
+    # Public: fetch and summarize pending science mismatch results until an
+    # an unrecognized result is found.  Outputs summaries to STDOUT.  May
+    # modify current mismatch result.
+    #
+    # Returns nil.  Leaves current mismatch result set to first unknown result,
+    # if one is found.
+    def analyze
+      track do
+        while true
+          unless more?
+            fetch # clear current result
+            return summarize_unknown_result
+          end
+
+          print "."
+          fetch
+          break if unknown?
+          count_as_seen identify
+        end
+
+        print "\n"
+        summarize_unknown_result
+      end
+    end
+
+    # Public: skip pending mismatch results not satisfying the provided block.
+    # May modify current mismatch result.
+    #
+    # &block - block accepting a prepared mismatch result and returning true
+    #          or false.
+    #
+    # Examples:
+    #
+    #   jump_to do |result|
+    #     result.user.staff?
+    #   end
+    #
+    #   jump_to do |result|
+    #     result['group']['id'] > 100 && result['url'] =~ %r{/admin}
+    #   end
+    #
+    #   jump_to do |result|
+    #     result['timestamp'].to_i > 1.hour.ago
+    #   end
+    #
+    # Returns nil if no satisfying results are found.  Current result will be nil.
+    # Returns count of remaining results if a satisfying result found.  Leaves
+    # current result set to first result for which block returns a truthy value.
+    def jump_to(&block)
+      raise ArgumentError, "a block is required" unless block_given?
+
+      while more?
+        fetch
+        return count if yield(current)
+      end
+
+      # clear current result since nothing of interest was found.
+      @current = @identified = nil
+    end
+
+    # Public: Are additional science mismatch results available?
+    #
+    # Returns true if more results can be fetched.
+    # Returns false if no more results can be fetched.
+    def more?
+      count != 0
+    end
+
+    # Public: retrieve a new science mismatch result, as returned by `#read`.
+    #
+    # Returns nil if no new science mismatch results are available.
+    # Returns a cooked and wrapped science mismatch result if available.
+    # Raises NoMethodError if `#read` is not defined on this class.
+    def fetch
+      @identified = nil
+      @raw = read
+      @current = raw ? prepare(raw) : nil
+    end
+
+    # Public: Return a readable representation of the current science mismatch
+    # result.  This will utilize the `#readable` methods declared on a matcher
+    # which identifies the current result.
+    #
+    # Returns a string containing a readable representation of the current
+    # science mismatch result.
+    # Returns nil if there is no current result.
+    def summary
+      return nil unless current
+      recognizer = identify
+      return readable unless recognizer && recognizer.respond_to?(:readable)
+      recognizer.readable
+    end
+
+    # Public: Print a readable summary for the current science mismatch result
+    # to STDOUT.
+    #
+    # Returns nil.
+    def summarize
+      puts summary
+    end
+
+    # Public: Is the current science mismatch result unidentifiable?
+    #
+    # Returns nil if current result is nil.
+    # Returns true if no matcher can identify current result.
+    # Returns false if a single matcher can identify the current result.
+    # Raises RuntimeError if multiple matchers can identify the current result.
+    def unknown?
+      return nil if current.nil?
+      !identify
+    end
+
+    # Public: Find a matcher which can identify the current science mismatch result.
+    #
+    # Returns nil if current result is nil.
+    # Returns matcher class if a single matcher can identify current result.
+    # Returns false if no matcher can identify the current result.
+    # Raises RuntimeError if multiple matchers can identify the current result.
+    def identify
+      return @identified if @identified
+
+      results = registry.identify(current)
+      if results.size > 1
+        report_multiple_matchers(results)
+      end
+
+      @identified = results.first
+    end
+
+    # Internal: Output failure message about duplicate matchers for a science
+    #           mismatch result.
+    #
+    # dupes - Array of Dat::Analysis::Matcher instances, initialized with a result
+    #
+    # Raises RuntimeError.
+    def report_multiple_matchers(dupes)
+      puts "\n\nMultiple matchers identified result:"
+      puts
+
+      dupes.each_with_index do |matcher, i|
+        print " #{i+1}. "
+        if matcher.respond_to?(:readable)
+          puts matcher.readable
+        else
+          puts readable
+        end
+      end
+
+      puts
+      raise "Result cannot be uniquely identified."
+    end
+
+    # Internal: cook and wrap a raw science mismatch result.
+    #
+    # raw_result - an unmodified result, typically, as returned by `#read`
+    #
+    # Returns the science mismatch result processed by `#cook` and then by `#wrap`.
+    def prepare(raw_result)
+      wrap(cook(raw_result))
+    end
+
+    # Internal: wrap a "cooked" science mismatch result with any known wrapper methods
+    #
+    # cooked_result - a "cooked" mismatch result, as returned by `#cook`
+    #
+    # Returns the cooked science mismatch result, which will now respond to any
+    # instance methods found on our known wrapper classes
+    def wrap(cooked_result)
+      if !wrappers.empty?
+        cooked_result.send(:instance_variable_set, '@analyzer', self)
+
+        class << cooked_result
+          define_method(:method_missing) do |meth, *args|
+            found = nil
+            @analyzer.wrappers.each do |wrapper|
+              next unless wrapper.public_instance_methods.detect {|m| m.to_s == meth.to_s }
+              found = wrapper.new(self).send(meth, *args)
+              break
+            end
+            found
+          end
+        end
+      end
+
+      cooked_result
+    end
+
+    # Internal:  Return the *default* readable representation of the current science
+    # mismatch result. This method is typically overridden by subclasses or defined
+    # in matchers which wish to customize the readable representation of a science
+    # mismatch result. This implementation is provided as a default.
+    #
+    # Returns a string containing a readable representation of the current
+    # science mismatch result.
+    def readable
+      synopsis = []
+
+      synopsis << "Experiment %-20s first: %10s @ %s" % [
+        "[#{current['experiment']}]", current['first'], current['timestamp']
+      ]
+      synopsis << "Duration:  control (%6.2f) | candidate (%6.2f)" % [
+        current['control']['duration'], current['candidate']['duration']
+      ]
+
+      synopsis << ""
+
+      if current['control']['exception']
+        synopsis << "Control raised exception:\n\t#{current['control']['exception'].inspect}"
+      else
+        synopsis << "Control value:   [#{current['control']['value']}]"
+      end
+
+      if current['candidate']['exception']
+        synopsis << "Candidate raised exception:\n\t#{current['candidate']['exception'].inspect}"
+      else
+        synopsis << "Candidate value: [#{current['candidate']['value']}]"
+      end
+
+      synopsis << ""
+
+      remaining = current.keys - ['control', 'candidate', 'experiment', 'first', 'timestamp']
+      remaining.sort.each do |key|
+        if current[key].respond_to?(:keys)
+          # do ordered sorting of hash keys
+          subkeys = key_sort(current[key].keys)
+          synopsis << "\t%15s => {" % [ key ]
+          subkeys.each do |subkey|
+            synopsis << "\t%15s       %15s => %-20s" % [ '', subkey, current[key][subkey].inspect ]
+          end
+          synopsis << "\t%15s    }" % [ '' ]
+        else
+          synopsis << "\t%15s => %-20s" % [ key, current[key] ]
+        end
+      end
+
+      synopsis.join "\n"
+    end
+
+    def preferred_fields
+      %w(id name title owner description login username)
+    end
+
+    def key_sort(keys)
+      str_keys = keys.map {|k| k.to_s }
+      (preferred_fields & str_keys) + (str_keys - preferred_fields)
+    end
+
+    # Public: Which matcher classes are known?
+    #
+    # Returns: list of Dat::Analysis::Matcher classes known to this analyzer.
+    def matchers
+      registry.matchers
+    end
+
+    # Public: Which wrapper classes are known?
+    #
+    # Returns: list of Dat::Analysis::Result classes known to this analyzer.
+    def wrappers
+      registry.wrappers
+    end
+
+    # Public: Add a matcher or wrapper class to this analyzer.
+    #
+    # klass - a subclass of either Dat::Analysis::Matcher or Dat::Analysis::Result
+    #         to be registered with this analyzer.
+    #
+    # Returns the list of known matchers and wrappers for this analyzer.
+    def add(klass)
+      klass.add_to_analyzer(self)
+    end
+
+    # Public: Load matcher and wrapper classes from the library for our experiment.
+    #
+    # Returns: a list of loaded matcher and wrapper classes.
+    def load_classes
+      new_classes = library.select_classes do
+        experiment_files.each { |file| load file }
+      end
+
+      new_classes.map {|klass| add klass }
+    end
+
+    # Internal:  Print to STDOUT a readable summary of the current (unknown) science
+    # mismatch result, as well a summary of the tally of identified science mismatch
+    # results analyzed to this point.
+    #
+    # Returns nil if there are no pending science mismatch results.
+    # Returns the number of pending science mismatch results.
+    def summarize_unknown_result
+      tally.summarize
+      if current
+        puts "\nFirst unidentifiable result:\n\n"
+        summarize
+      else
+        puts "\nNo unidentifiable results found. \\m/\n"
+      end
+
+      more? ? count : nil
+    end
+
+    # Internal: keep a tally of analyzed science mismatch results.
+    #
+    # &block: block which will presumably call `#count_as_seen` to update
+    #         tallies of identified science mismatch results.
+    #
+    # Returns: value returned by &block.
+    def track(&block)
+      @tally = Tally.new
+      yield
+    end
+
+    # Internal: Increment count for an object in an ongoing tally.
+    #
+    # obj - an Object for which we are recording occurrence counts
+    #
+    # Returns updated tally count for obj.
+    def count_as_seen(obj)
+      tally.count(obj.class.name || obj.class.inspect)
+    end
+
+    # Internal: The current Tally instance.  Cached between calls to `#track`.
+    #
+    # Returns the current Tally instance object.
+    def tally
+      @tally ||= Tally.new
+    end
+
+    # Internal: handle to the library, used for collecting newly discovered
+    # matcher and wrapper classes.
+    #
+    # Returns: handle to the library class.
+    def library
+      Dat::Analysis::Library
+    end
+
+    # Internal: registry of wrapper and matcher classes known to this analyzer.
+    #
+    # Returns a (cached between calls) handle to our registry instance.
+    def registry
+      @registry ||= Dat::Analysis::Registry.new
+    end
+
+    # Internal: which class files are candidates for loading matchers and wrappers
+    # for this experiment?
+    #
+    # Returns: sorted Array of paths to ruby files which may contain declarations
+    # of matcher and wrapper classes for this experiment.
+    def experiment_files
+      Dir[File.join(path, experiment_name, '*.rb')].sort
+    end
+
+    # Internal:  Add a matcher class to this analyzer's registry.
+    # (Intended to be called only by Dat::Analysis::Matcher and subclasses)
+    def add_matcher(matcher_class)
+      puts "Loading matcher class [#{matcher_class}]"
+      registry.add matcher_class
+    end
+
+    # Internal:  Add a wrapper class to this analyzer's registry.
+    # (Intended to be called only by Dat::Analysis::Result and its subclasses)
+    def add_wrapper(wrapper_class)
+      puts "Loading results wrapper class [#{wrapper_class}]"
+      registry.add wrapper_class
+    end
+  end
+end
--- a/lib/dat/analysis/library.rb
+++ b/lib/dat/analysis/library.rb
@ -0,0 +1,30 @@
+module Dat
+  # Internal: Keep a registry of Dat::Analysis::Matcher and
+  # Dat::Analysis::Result subclasses for use by an Dat::Analysis::Analysis
+  # instance.
+  class Analysis::Library
+
+    @@known_classes = []
+
+    # Public: Collect matcher and results classes created by the
+    #         provided block.
+    #
+    # &block - Block which instantiates matcher and results classes.
+    #
+    # Returns the newly-instantiated matcher and results classes.
+    def self.select_classes(&block)
+      @@known_classes = [] # prepare for registering new classes
+      yield
+      @@known_classes # return all the newly-registered classes
+    end
+
+    # Public: register a matcher or results class.
+    #
+    # klass - a Dat::Analysis::Matcher or Dat::Analysis::Result subclass.
+    #
+    # Returns the current list of registered classes.
+    def self.add(klass)
+      @@known_classes << klass
+    end
+  end
+end
--- a/lib/dat/analysis/matcher.rb
+++ b/lib/dat/analysis/matcher.rb
@ -0,0 +1,43 @@
+module Dat
+  # Public: Base class for science mismatch results matchers.  Subclasses
+  #         implement the `#match?` instance method, which returns true when
+  #         a provided science mismatch result is recognized by the matcher.
+  #
+  # Subclasses are expected to define `#match?`.
+  #
+  # Subclasses may optionally define `#readable` to return an alternative
+  # readable String representation of a cooked science mismatch result.  The
+  # default implementation is defined in Dat::Analysis#readable.
+  class Analysis::Matcher
+
+    # Public: The science mismatch result to be matched.
+    attr_reader :result
+
+    # Internal: Called at subclass instantiation time to register the subclass
+    #           with Dat::Analysis::Library.
+    #
+    # subclass - The Dat::Analysis::Matcher subclass being instantiated.
+    #
+    # Not intended to be called directly.
+    def self.inherited(subclass)
+      Dat::Analysis::Library.add subclass
+    end
+
+    # Internal: Add this class to a Dat::Analysis instance.  Intended to be
+    #           called from Dat::Analysis to dispatch registration.
+    #
+    # analyzer - a Dat::Analysis instance for an experiment
+    #
+    # Returns the analyzer's updated list of known matcher classes.
+    def self.add_to_analyzer(analyzer)
+      analyzer.add_matcher self
+    end
+
+    # Public: create a new Matcher.
+    #
+    # result - a science mismatch result, to be tested via `#match?`
+    def initialize(result)
+      @result = result
+    end
+  end
+end
--- a/lib/dat/analysis/registry.rb
+++ b/lib/dat/analysis/registry.rb
@ -0,0 +1,50 @@
+module Dat
+  # Internal: Registry of Dat::Analysis::Matcher and Dat::Analysis::Result
+  #           classes.  This is used to maintain the mapping of matchers and
+  #           results wrappers for a particular Dat::Analysis instance.
+  class Analysis::Registry
+
+    # Public: Create a new Registry instance.
+    def initialize
+      @known_classes = []
+    end
+
+    # Public: Add a matcher or results wrapper class to the registry
+    #
+    # klass - a Dat::Analysis::Matcher subclass or a Dat::Analysis::Result
+    #         subclass, to be added to the registry.
+    #
+    # Returns the list of currently registered classes.
+    def add(klass)
+      @known_classes << klass
+    end
+
+    # Public: Get the list of known Dat::Analysis::Matcher subclasses
+    #
+    # Returns the list of currently known matcher classes.
+    def matchers
+      @known_classes.select {|c| c <= ::Dat::Analysis::Matcher }
+    end
+
+    # Public: Get the list of known Dat::Analysis::Result subclasses
+    #
+    # Returns the list of currently known result wrapper classes.
+    def wrappers
+      @known_classes.select {|c| c <= ::Dat::Analysis::Result }
+    end
+
+    # Public: Get list of Dat::Analysis::Matcher subclasses for which
+    #         `#match?` is truthy for the given result.
+    #
+    # result - a cooked science mismatch result
+    #
+    # Returns a list of matchers initialized with the provided result.
+    def identify(result)
+      matchers.inject([]) do |hits, matcher|
+        instance = matcher.new(result)
+        hits << instance if instance.match?
+        hits
+      end
+    end
+  end
+end
--- a/lib/dat/analysis/result.rb
+++ b/lib/dat/analysis/result.rb
@ -0,0 +1,39 @@
+module Dat
+  # Public: Base class for wrappers around science mismatch results.
+  #
+  # Instance methods defined on subclasses will be added as instance methods
+  # on science mismatch results handled by Dat::Analysis instances which
+  # add the wrapper subclass via Dat::Analysis#add or Dat::Analysis#load_classes.
+  class Analysis::Result
+
+    # Public: return the current science mismatch result
+    attr_reader :result
+
+    # Internal: Called at subclass instantiation time to register the subclass
+    #           with Dat::Analysis::Library.
+    #
+    # subclass - The Dat::Analysis::Result subclass being instantiated.
+    #
+    # Not intended to be called directly.
+    def self.inherited(subclass)
+      Dat::Analysis::Library.add subclass
+    end
+
+    # Internal: Add this class to a Dat::Analysis instance.  Intended to be
+    #           called from Dat::Analysis to dispatch registration.
+    #
+    # analyzer - a Dat::Analysis instance for an experiment
+    #
+    # Returns the analyzer's updated list of known result wrapper classes.
+    def self.add_to_analyzer(analyzer)
+      analyzer.add_wrapper self
+    end
+
+    # Public: create a new Result wrapper.
+    #
+    # result - a science mismatch result, to be wrapped with our instance methods.
+    def initialize(result)
+      @result = result
+    end
+  end
+end
--- a/lib/dat/analysis/tally.rb
+++ b/lib/dat/analysis/tally.rb
@ -0,0 +1,59 @@
+module Dat
+  # Internal: Track and summarize counts of occurrences of mismatch objects.
+  #
+  # Examples
+  #
+  #   tally = Dat::Analysis::Tally.new
+  #   tally.count('foo')
+  #   => 1
+  #   tally.count('bar')
+  #   => 1
+  #   tally.count('foo')
+  #   => 2
+  #   puts tally.summary
+  #   Summary of known mismatches found:
+  #   foo	2
+  #   bar	1
+  #   TOTAL:	3
+  #   => nil
+  #
+  class Analysis::Tally
+
+    # Public: Returns the hash of recorded mismatches.
+    attr_reader :tally
+
+    def initialize
+      @tally = {}
+    end
+
+    # Public: record an occurrence of a mismatch class.
+    def count(klass)
+      tally[klass] ||= 0
+      tally[klass] += 1
+    end
+
+    # Public: Return a String summary of mismatches seen so far.
+    #
+    # Returns a printable String summarizing the counts of mismatches seen,
+    # sorted in descending count order.
+    def summary
+      return "\nNo results identified.\n" if tally.keys.empty?
+      result = [ "\nSummary of identified results:\n" ]
+      sum = 0
+      tally.keys.sort_by {|k| -1*tally[k] }.each do |k|
+        sum += tally[k]
+        result << "%30s: %6d" % [k, tally[k]]
+      end
+      result << "%30s: %6d" % ['TOTAL', sum]
+      result.join "\n"
+    end
+
+    # Public: prints a summary of mismatches seen so far to STDOUT (see
+    # `#summary` above).
+    #
+    # Returns nil.
+    def summarize
+      puts summary
+    end
+  end
+end