ruby/benchmark/so_k_nucleotide.yml

prelude: |
  bm_so_fasta = <<'EOS'
  # The Computer Language Shootout
  # http://shootout.alioth.debian.org/
  # Contributed by Sokolov Yura

  $last = 42.0
  def gen_random(max, im=139968, ia=3877, ic=29573)
      (max * ($last = ($last * ia + ic) % im)) / im
  end

  alu =
     "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG"+
     "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA"+
     "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT"+
     "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA"+
     "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG"+
     "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC"+
     "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"

  iub = [
      ["a", 0.27],
      ["c", 0.12],
      ["g", 0.12],
      ["t", 0.27],

      ["B", 0.02],
      ["D", 0.02],
      ["H", 0.02],
      ["K", 0.02],
      ["M", 0.02],
      ["N", 0.02],
      ["R", 0.02],
      ["S", 0.02],
      ["V", 0.02],
      ["W", 0.02],
      ["Y", 0.02],
  ]
  homosapiens = [
      ["a", 0.3029549426680],
      ["c", 0.1979883004921],
      ["g", 0.1975473066391],
      ["t", 0.3015094502008],
  ]

  def make_repeat_fasta(id, desc, src, n)
      puts ">#{id} #{desc}"
      v = nil
      width = 60
      l = src.length
      s = src * ((n / l) + 1)
      s.slice!(n, l)
      puts(s.scan(/.{1,#{width}}/).join("\n"))
  end

  def make_random_fasta(id, desc, table, n)
      puts ">#{id} #{desc}"
      rand, v = nil,nil
      width = 60
      chunk = 1 * width
      prob = 0.0
      table.each{|v| v[1]= (prob += v[1])}
      for i in 1..(n/width)
          puts((1..width).collect{
              rand = gen_random(1.0)
              table.find{|v| v[1]>rand}[0]
          }.join)
      end
      if n%width != 0
          puts((1..(n%width)).collect{
              rand = gen_random(1.0)
              table.find{|v| v[1]>rand}[0]
          }.join)
      end
  end


  n = (ARGV[0] or 250_000).to_i

  make_repeat_fasta('ONE', 'Homo sapiens alu', alu, n*2)
  make_random_fasta('TWO', 'IUB ambiguity codes', iub, n*3)
  make_random_fasta('THREE', 'Homo sapiens frequency', homosapiens, n*5)
  EOS
benchmark:
  - name: so_k_nucleotide
    prelude: |
      script = File.join(File.dirname($0), 'bm_so_fasta.rb')
      File.write(script, bm_so_fasta)

      def prepare_fasta_output n
        filebase = File.join(File.dirname($0), 'fasta.output')
        script = File.join(File.dirname($0), 'bm_so_fasta.rb')
        file = "#{filebase}.#{n}"

        unless FileTest.exist?(file)
          STDERR.puts "preparing #{file}"

          open(file, 'w'){|f|
            ARGV[0] = n
            $stdout = f
            load script
            $stdout = STDOUT
          }
        end
      end
      prepare_fasta_output(100_000)
    script: |
      # The Computer Language Shootout
      # http://shootout.alioth.debian.org
      #
      # contributed by jose fco. gonzalez
      # modified by Sokolov Yura

      seq = String.new

      def frecuency( seq,length )
          n, table = seq.length - length + 1, Hash.new(0)
          f, i = nil, nil
          (0 ... length).each do |f|
              (f ... n).step(length) do |i|
                  table[seq[i,length]] += 1
              end
          end
          [n,table]

      end

      def sort_by_freq( seq,length )
          n,table = frecuency( seq,length )
          a, b, v = nil, nil, nil
          table.sort{|a,b| b[1] <=> a[1]}.each do |v|
              puts "%s %.3f" % [v[0].upcase,((v[1]*100).to_f/n)]
          end
          puts
      end

      def find_seq( seq,s )
          n,table = frecuency( seq,s.length )
          puts "#{table[s].to_s}\t#{s.upcase}"
      end

      input = open(File.join(File.dirname($0), 'fasta.output.100000'), 'rb')

      line = input.gets while line !~ /^>THREE/
      line = input.gets

      while (line !~ /^>/) & line do
          seq << line.chomp
          line = input.gets
      end

      [1,2].each {|i| sort_by_freq( seq,i ) }

      %w(ggt ggta ggtatt ggtattttaatt ggtattttaatttatagt).each{|s| find_seq( seq,s) }
    loop_count: 1
benchmark: introduce benchmark_driver.gem Makefile.in: Clone benchmark-driver repository in benchmark/benchmark-driver `make update-benchmark-driver`, like simplecov. win32/Makefile.sub: Roughly do the same thing. .gitignore: Ignore the cloned repository. common.mk: Trigger `make update-benchmark-driver` to run `make benchmark` and adjust arguments for benchmark_driver.gem. benchmark/require.yml: renamed from benchmark/bm_require.rb, benchmark/prepare_require.rb benchmark/require_thread.yml: renamed from benchmark/bm_require_thread.rb, benchmark/prepare_require_thread.rb benchmark/so_count_words.yml: renamed from benchmark/bm_so_count_words.rb, benchmark/prepare_so_count_words.rb, benchmark/wc.input.base benchmark/so_k_nucleotide.yml: renamed from benchmark/bm_so_k_nucleotide.rb, benchmark/prepare_so_k_nucleotide.rb, benchmark/make_fasta_output.rb benchmark/so_reverse_complement.yml: renamed from benchmark/bm_so_reverse_complement.rb, benchmark/prepare_so_reverse_complement.rb, benchmark/make_fasta_output.rb I'm sorry but I made some duplications between benchmark/require.yml and benchmark/require_thread.yml, and between benchmark/so_k_nucleotide.yml and benchmark/so_reverse_complement.yml. If you're not comfortable with it, please combine these YAMLs to share the same prelude. One YAML file can have multiple benchmark definitions sharing prelude. benchmark/driver.rb: Replace its core feature with benchmark_driver.gem. Some old features are gone for now, but I'll add them again later. [Misc #14902] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@63888 b2dd03c8-39d4-4d8f-98ff-823fe69b080e 2018-07-08 17:38:05 +03:00			`prelude: \|`
			`bm_so_fasta = <<'EOS'`
			`# The Computer Language Shootout`
			`# http://shootout.alioth.debian.org/`
			`# Contributed by Sokolov Yura`

			`$last = 42.0`
			`def gen_random(max, im=139968, ia=3877, ic=29573)`
			`(max * ($last = ($last * ia + ic) % im)) / im`
			`end`

			`alu =`
			`"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG"+`
			`"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA"+`
			`"CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT"+`
			`"ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA"+`
			`"GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG"+`
			`"AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC"+`
			`"AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"`

			`iub = [`
			`["a", 0.27],`
			`["c", 0.12],`
			`["g", 0.12],`
			`["t", 0.27],`

			`["B", 0.02],`
			`["D", 0.02],`
			`["H", 0.02],`
			`["K", 0.02],`
			`["M", 0.02],`
			`["N", 0.02],`
			`["R", 0.02],`
			`["S", 0.02],`
			`["V", 0.02],`
			`["W", 0.02],`
			`["Y", 0.02],`
			`]`
			`homosapiens = [`
			`["a", 0.3029549426680],`
			`["c", 0.1979883004921],`
			`["g", 0.1975473066391],`
			`["t", 0.3015094502008],`
			`]`

			`def make_repeat_fasta(id, desc, src, n)`
			`puts ">#{id} #{desc}"`
			`v = nil`
			`width = 60`
			`l = src.length`
			`s = src * ((n / l) + 1)`
			`s.slice!(n, l)`
			`puts(s.scan(/.{1,#{width}}/).join("\n"))`
			`end`

			`def make_random_fasta(id, desc, table, n)`
			`puts ">#{id} #{desc}"`
			`rand, v = nil,nil`
			`width = 60`
			`chunk = 1 * width`
			`prob = 0.0`
			`table.each{\|v\| v[1]= (prob += v[1])}`
			`for i in 1..(n/width)`
			`puts((1..width).collect{`
			`rand = gen_random(1.0)`
			`table.find{\|v\| v[1]>rand}[0]`
			`}.join)`
			`end`
			`if n%width != 0`
			`puts((1..(n%width)).collect{`
			`rand = gen_random(1.0)`
			`table.find{\|v\| v[1]>rand}[0]`
			`}.join)`
			`end`
			`end`


			`n = (ARGV[0] or 250_000).to_i`

			`make_repeat_fasta('ONE', 'Homo sapiens alu', alu, n*2)`
			`make_random_fasta('TWO', 'IUB ambiguity codes', iub, n*3)`
			`make_random_fasta('THREE', 'Homo sapiens frequency', homosapiens, n*5)`
			`EOS`
			`benchmark:`
			`- name: so_k_nucleotide`
			`prelude: \|`
			`script = File.join(File.dirname($0), 'bm_so_fasta.rb')`
			`File.write(script, bm_so_fasta)`

			`def prepare_fasta_output n`
			`filebase = File.join(File.dirname($0), 'fasta.output')`
			`script = File.join(File.dirname($0), 'bm_so_fasta.rb')`
			`file = "#{filebase}.#{n}"`

			`unless FileTest.exist?(file)`
			`STDERR.puts "preparing #{file}"`

			`open(file, 'w'){\|f\|`
			`ARGV[0] = n`
			`$stdout = f`
			`load script`
			`$stdout = STDOUT`
			`}`
			`end`
			`end`
			`prepare_fasta_output(100_000)`
			`script: \|`
			`# The Computer Language Shootout`
			`# http://shootout.alioth.debian.org`
			`#`
			`# contributed by jose fco. gonzalez`
			`# modified by Sokolov Yura`

			`seq = String.new`

			`def frecuency( seq,length )`
			`n, table = seq.length - length + 1, Hash.new(0)`
			`f, i = nil, nil`
			`(0 ... length).each do \|f\|`
			`(f ... n).step(length) do \|i\|`
			`table[seq[i,length]] += 1`
			`end`
			`end`
			`[n,table]`

			`end`

			`def sort_by_freq( seq,length )`
			`n,table = frecuency( seq,length )`
			`a, b, v = nil, nil, nil`
			`table.sort{\|a,b\| b[1] <=> a[1]}.each do \|v\|`
			`puts "%s %.3f" % [v[0].upcase,((v[1]*100).to_f/n)]`
			`end`
			`puts`
			`end`

			`def find_seq( seq,s )`
			`n,table = frecuency( seq,s.length )`
			`puts "#{table[s].to_s}\t#{s.upcase}"`
			`end`

			`input = open(File.join(File.dirname($0), 'fasta.output.100000'), 'rb')`

			`line = input.gets while line !~ /^>THREE/`
			`line = input.gets`

			`while (line !~ /^>/) & line do`
			`seq << line.chomp`
			`line = input.gets`
			`end`

			`[1,2].each {\|i\| sort_by_freq( seq,i ) }`

			`%w(ggt ggta ggtatt ggtattttaatt ggtattttaatttatagt).each{\|s\| find_seq( seq,s) }`
			`loop_count: 1`