From ffa1b473f0f30f2cea4318f0b20a4cc9d7e4d331 Mon Sep 17 00:00:00 2001
From: halostatue <austin@zieglers.ca>
Date: Sat, 28 Feb 2009 00:13:20 -0500
Subject: [PATCH 1/3] Added a gitignore; put the IANA downloader in the
 Rakefile.

---
 .gitignore |   6 ++
 Rakefile   | 252 +++++++++++++++++++++++++++--------------------------
 2 files changed, 136 insertions(+), 122 deletions(-)
 create mode 100644 .gitignore
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..072a118
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+*.swp
+html
+doc
+pkg
+publish
+coverage
diff --git a/Rakefile b/Rakefile
index bc89cf9..5e98857 100644
--- a/Rakefile
+++ b/Rakefile
@@ -42,8 +42,7 @@ Hoe.new PKG_NAME, PKG_VERSION do |p|
 
   p.clean_globs     << "coverage"
 
-  p.spec_extras[:extra_rdoc_files] = MANIFEST.grep(/txt$/) -
-    ["Manifest.txt"]
+  p.spec_extras[:extra_rdoc_files] = MANIFEST.grep(/txt$/) - ["Manifest.txt"]
 end
 
 desc "Build a MIME::Types .tar.gz distribution."
@@ -72,7 +71,7 @@ file PKG_TAR => [ :test ] do |t|
   begin
     unless File.directory?(File.dirname(t.name))
       require 'fileutils'
-      File.mkdir_p File.dirname(t.name)
+      FileUtils.mkdir_p File.dirname(t.name)
     end
     tf = File.open(t.name, 'wb')
     gz = Zlib::GzipWriter.new(tf)
@@ -118,138 +117,147 @@ task :build_manifest do |t|
 end
 
 desc "Download the current MIME type registrations from IANA."
-task :download_from_iana do |t|
-#!/usr/bin/ruby -w
+task :iana, :save, :destination do |t, args|
+  save_type = args.save || :text
+  save_type = save_type.to_sym
 
-require 'rubygems'
-require 'open-uri'
-require 'nokogiri'
-require 'cgi'
-
-class IANAParser
-  include Comparable
-
-  INDEX = %q(http://www.iana.org/assignments/media-types/)
-  CONTACT_PEOPLE = %r{http://www.iana.org/assignments/contact-people.html?#(.*)}
-  RFC_EDITOR = %r{http://www.rfc-editor.org/rfc/rfc(\d+).txt}
-  IETF_RFC = %r{http://www.ietf.org/rfc/rfc(\d+).txt}
-  IETF_RFC_TOOLS = %r{http://tools.ietf.org/html/rfc(\d+)}
-
-  class << self
-    def load_index
-      @types ||= {}
-
-      Nokogiri::HTML(open(INDEX) { |f| f.read }).xpath('//p/a').each do |tag|
-        href_match = %r{^/assignments/media-types/(.+)/$}.match(tag['href'])
-        next if href_match.nil?
-        type = href_match.captures[0]
-        @types[tag.content] = IANAParser.new(tag.content, type)
-      end
-    end
-
-    attr_reader :types
+  case save_type
+  when :text, :both, :html
+    nil
+  else
+    raise "Unknown save type provided. Must be one of text, both, or html."
   end
 
-  def initialize(name, type)
-    @name = name
-    @type = type
-    @url  = File.join(INDEX, @type)
-  end
+  destination = args.destination || "type-lists"
 
-  attr_reader :name
-  attr_reader :type
-  attr_reader :url
-  attr_reader :html
+  require 'open-uri'
+  require 'nokogiri'
+  require 'cgi'
 
-  def download(name = nil)
-    if name
-      @html = Nokogiri::HTML(open(name) { |f| f.read })
-    else
-      @html = Nokogiri::HTML(open(@url) { |f| f.read })
-    end
-  end
+  class IANAParser
+    include Comparable
 
-  def save_html
-    File.open("#@name.html", "wb") { |w| w.write @html }
-  end
+    INDEX = %q(http://www.iana.org/assignments/media-types/)
+    CONTACT_PEOPLE = %r{http://www.iana.org/assignments/contact-people.html?#(.*)}
+    RFC_EDITOR = %r{http://www.rfc-editor.org/rfc/rfc(\d+).txt}
+    IETF_RFC = %r{http://www.ietf.org/rfc/rfc(\d+).txt}
+    IETF_RFC_TOOLS = %r{http://tools.ietf.org/html/rfc(\d+)}
 
-  def <=>(o)
-    self.name <=> o.name
-  end
+    class << self
+      def load_index
+        @types ||= {}
 
-  def parse
-    nodes = html.xpath("//table//table//tr")
-
-    # How many <td> children does the first node have?
-    node_count = nodes.first.children.select { |node| node.elem? }.size
-
-    @mime_types = nodes.map do |node|
-      next if node == nodes.first
-      elems = node.children.select { |n| n.elem? }
-      next if elems.size.zero?
-      raise "size mismatch #{elems.size} != #{node_count}" if node_count != elems.size
-
-      case elems.size
-      when 3
-        subtype_index = 1
-        refnode_index = 2
-      when 4
-        subtype_index = 1
-        refnode_index = 3
-      else
-        raise "Unknown element size."
-      end
-
-      subtype   = elems[subtype_index].content.chomp.strip
-      refnodes  = elems[refnode_index].children.select { |n| n.elem? }.map { |ref|
-        case ref['href']
-        when CONTACT_PEOPLE
-          tag = CGI::unescape($1).chomp.strip
-          if tag == ref.content
-            "[#{ref.content}]"
-          else
-            "[#{ref.content}=#{tag}]"
-          end
-        when RFC_EDITOR, IETF_RFC, IETF_RFC_TOOLS
-          "RFC#$1"
-        when %r{(https?://.*)}
-          "{#{ref.content}=#$1}"
-        else
-          ref
+        Nokogiri::HTML(open(INDEX) { |f| f.read }).xpath('//p/a').each do |tag|
+          href_match = %r{^/assignments/media-types/(.+)/$}.match(tag['href'])
+          next if href_match.nil?
+          type = href_match.captures[0]
+          @types[tag.content] = IANAParser.new(tag.content, type)
         end
-      }
-      refs = refnodes.join(',')
+      end
+
+      attr_reader :types
+    end
+
+    def initialize(name, type)
+      @name = name
+      @type = type
+      @url  = File.join(INDEX, @type)
+    end
+
+    attr_reader :name
+    attr_reader :type
+    attr_reader :url
+    attr_reader :html
+
+    def download(name = nil)
+      @html = Nokogiri::HTML(open(name || @url) { |f| f.read })
+    end
+
+    def save_html
+      File.open("#@name.html", "wb") { |w| w.write @html }
+    end
+
+    def <=>(o)
+      self.name <=> o.name
+    end
+
+    def parse
+      nodes = html.xpath("//table//table//tr")
+
+      # How many <td> children does the first node have?
+      node_count = nodes.first.children.select { |node| node.elem? }.size
+
+      @mime_types = nodes.map do |node|
+        next if node == nodes.first
+        elems = node.children.select { |n| n.elem? }
+        next if elems.size.zero?
+        raise "size mismatch #{elems.size} != #{node_count}" if node_count != elems.size
+
+        case elems.size
+        when 3
+          subtype_index = 1
+          refnode_index = 2
+        when 4
+          subtype_index = 1
+          refnode_index = 3
+        else
+          raise "Unknown element size."
+        end
+
+        subtype   = elems[subtype_index].content.chomp.strip
+        refnodes  = elems[refnode_index].children.select { |n| n.elem? }.map { |ref|
+          case ref['href']
+          when CONTACT_PEOPLE
+            tag = CGI::unescape($1).chomp.strip
+            if tag == ref.content
+            "[#{ref.content}]"
+            else
+            "[#{ref.content}=#{tag}]"
+            end
+          when RFC_EDITOR, IETF_RFC, IETF_RFC_TOOLS
+          "RFC#$1"
+          when %r{(https?://.*)}
+          "{#{ref.content}=#$1}"
+          else
+            ref
+          end
+        }
+        refs = refnodes.join(',')
 
       "#@type/#{subtype} 'IANA,#{refs}"
-    end.compact
+      end.compact
 
-    @mime_types
+      @mime_types
+    end
+
+    def save_text
+      File.open("#@name.txt", "wb") { |w| w.write @mime_types.join("\n") }
+    end
   end
 
-  def save_text
-    File.open("#@name.txt", "wb") { |w| w.write @mime_types.join("\n") }
+  puts "Downloading index of MIME types from #{IANAParser::INDEX}."
+  IANAParser.load_index
+
+  require 'fileutils'
+  FileUtils.mkdir_p destination
+  Dir.chdir destination do
+    IANAParser.types.values.sort.each do |parser|
+      next if parser.name == "example" or parser.name == "mime"
+      puts "Downloading #{parser.name} from #{parser.url}"
+      parser.download
+
+      if :html == save_type || :both == save_type
+        puts "Saving #{parser.name}.html"
+        parser.save_html
+      end
+
+      puts "Parsing #{parser.name} HTML"
+      parser.parse
+
+      if :text == save_type || :both == save_type
+        puts "Saving #{parser.name}.txt"
+        parser.save_text
+      end
+    end
   end
 end
-
-puts "Downloading index of MIME types from #{IANAParser::INDEX}."
-IANAParser.load_index
-
-IANAParser.types.values.sort.each do |parser|
-  next if parser.name == "example" or parser.name == "mime"
-  puts "Downloading #{parser.name} from #{parser.url}"
-  parser.download
-  puts "Saving #{parser.name}.html"
-  parser.save_html
-  puts "Parsing #{parser.name}"
-  parser.parse
-  puts "Saving #{parser.name}.txt"
-  parser.save_text
-end
-
-# foo = IANAParser.types['application']
-# foo.download("application.html")
-# foo.parse
-# foo = IANAParser.types['image']
-# foo.download("image.html")
-# foo.parse
-end

From d1bc6380c1c0e58c7c90dd77045e8187ff8ba4af Mon Sep 17 00:00:00 2001
From: halostatue <austin@zieglers.ca>
Date: Sat, 28 Feb 2009 00:14:00 -0500
Subject: [PATCH 2/3] Updated gitignore to ignore type-lists; removed
 get-latest.rb since it's now in the rakefile.

---
 .gitignore               |   1 +
 type-lists/get-latest.rb | 133 ---------------------------------------
 2 files changed, 1 insertion(+), 133 deletions(-)
 delete mode 100755 type-lists/get-latest.rb

diff --git a/.gitignore b/.gitignore
index 072a118..c1ee442 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@ doc
 pkg
 publish
 coverage
+type-lists
diff --git a/type-lists/get-latest.rb b/type-lists/get-latest.rb
deleted file mode 100755
index 6a9edda..0000000
--- a/type-lists/get-latest.rb
+++ /dev/null
@@ -1,133 +0,0 @@
-#!/usr/bin/ruby -w
-
-require 'rubygems'
-require 'open-uri'
-require 'nokogiri'
-require 'cgi'
-
-class IANAParser
-  include Comparable
-
-  INDEX = %q(http://www.iana.org/assignments/media-types/)
-  CONTACT_PEOPLE = %r{http://www.iana.org/assignments/contact-people.html?#(.*)}
-  RFC_EDITOR = %r{http://www.rfc-editor.org/rfc/rfc(\d+).txt}
-  IETF_RFC = %r{http://www.ietf.org/rfc/rfc(\d+).txt}
-  IETF_RFC_TOOLS = %r{http://tools.ietf.org/html/rfc(\d+)}
-
-  class << self
-    def load_index
-      @types ||= {}
-
-      Nokogiri::HTML(open(INDEX) { |f| f.read }).xpath('//p/a').each do |tag|
-        href_match = %r{^/assignments/media-types/(.+)/$}.match(tag['href'])
-        next if href_match.nil?
-        type = href_match.captures[0]
-        @types[tag.content] = IANAParser.new(tag.content, type)
-      end
-    end
-
-    attr_reader :types
-  end
-
-  def initialize(name, type)
-    @name = name
-    @type = type
-    @url  = File.join(INDEX, @type)
-  end
-
-  attr_reader :name
-  attr_reader :type
-  attr_reader :url
-  attr_reader :html
-
-  def download(name = nil)
-    if name
-      @html = Nokogiri::HTML(open(name) { |f| f.read })
-    else
-      @html = Nokogiri::HTML(open(@url) { |f| f.read })
-    end
-  end
-
-  def save_html
-    File.open("#@name.html", "wb") { |w| w.write @html }
-  end
-
-  def <=>(o)
-    self.name <=> o.name
-  end
-
-  def parse
-    nodes = html.xpath("//table//table//tr")
-
-    # How many <td> children does the first node have?
-    node_count = nodes.first.children.select { |node| node.elem? }.size
-
-    @mime_types = nodes.map do |node|
-      next if node == nodes.first
-      elems = node.children.select { |n| n.elem? }
-      next if elems.size.zero?
-      raise "size mismatch #{elems.size} != #{node_count}" if node_count != elems.size
-
-      case elems.size
-      when 3
-        subtype_index = 1
-        refnode_index = 2
-      when 4
-        subtype_index = 1
-        refnode_index = 3
-      else
-        raise "Unknown element size."
-      end
-
-      subtype   = elems[subtype_index].content.chomp.strip
-      refnodes  = elems[refnode_index].children.select { |n| n.elem? }.map { |ref|
-        case ref['href']
-        when CONTACT_PEOPLE
-          tag = CGI::unescape($1).chomp.strip
-          if tag == ref.content
-            "[#{ref.content}]"
-          else
-            "[#{ref.content}=#{tag}]"
-          end
-        when RFC_EDITOR, IETF_RFC, IETF_RFC_TOOLS
-          "RFC#$1"
-        when %r{(https?://.*)}
-          "{#{ref.content}=#$1}"
-        else
-          ref
-        end
-      }
-      refs = refnodes.join(',')
-
-      "#@type/#{subtype} 'IANA,#{refs}"
-    end.compact
-
-    @mime_types
-  end
-
-  def save_text
-    File.open("#@name.txt", "wb") { |w| w.write @mime_types.join("\n") }
-  end
-end
-
-puts "Downloading index of MIME types from #{IANAParser::INDEX}."
-IANAParser.load_index
-
-IANAParser.types.values.sort.each do |parser|
-  next if parser.name == "example" or parser.name == "mime"
-  puts "Downloading #{parser.name} from #{parser.url}"
-  parser.download
-  puts "Saving #{parser.name}.html"
-  parser.save_html
-  puts "Parsing #{parser.name}"
-  parser.parse
-  puts "Saving #{parser.name}.txt"
-  parser.save_text
-end
-
-# foo = IANAParser.types['application']
-# foo.download("application.html")
-# foo.parse
-# foo = IANAParser.types['image']
-# foo.download("image.html")
-# foo.parse

From 54cd4193077d2decf875d1c0b7c12b15f270d6ed Mon Sep 17 00:00:00 2001
From: halostatue <austin@zieglers.ca>
Date: Sat, 28 Feb 2009 01:55:53 -0500
Subject: [PATCH 3/3] Fixed an encoding issue for 1.9.1

---
 lib/mime/types.rb.data | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/mime/types.rb.data b/lib/mime/types.rb.data
index c25b239..0ca2cf5 100644
--- a/lib/mime/types.rb.data
+++ b/lib/mime/types.rb.data
@@ -1,4 +1,4 @@
-# vim: ft=ruby enc=utf-8
+# vim: ft=ruby encoding=utf-8
 #--
 # MIME::Types
 # A Ruby implementation of a MIME Types information library. Based in spirit