2015-11-10 14:48:14 +03:00
|
|
|
# frozen_string_literal: true
|
2002-12-16 22:06:36 +03:00
|
|
|
require 'uri'
|
|
|
|
require 'stringio'
|
|
|
|
require 'time'
|
|
|
|
|
2019-07-14 11:18:17 +03:00
|
|
|
module URI
|
2011-08-27 02:22:37 +04:00
|
|
|
# Allows the opening of various resources including URIs.
|
2003-11-13 14:39:16 +03:00
|
|
|
#
|
2011-08-27 02:22:37 +04:00
|
|
|
# If the first argument responds to the 'open' method, 'open' is called on
|
|
|
|
# it with the rest of the arguments.
|
2003-11-13 14:39:16 +03:00
|
|
|
#
|
2019-10-26 11:29:43 +03:00
|
|
|
# If the first argument is a string that begins with <code>(protocol)://<code>, it is parsed by
|
2011-08-27 02:22:37 +04:00
|
|
|
# URI.parse. If the parsed object responds to the 'open' method,
|
|
|
|
# 'open' is called on it with the rest of the arguments.
|
|
|
|
#
|
2019-10-26 11:29:43 +03:00
|
|
|
# Otherwise, Kernel#open is called.
|
2003-11-13 14:39:16 +03:00
|
|
|
#
|
2012-10-20 06:01:23 +04:00
|
|
|
# OpenURI::OpenRead#open provides URI::HTTP#open, URI::HTTPS#open and
|
|
|
|
# URI::FTP#open, Kernel#open.
|
|
|
|
#
|
|
|
|
# We can accept URIs and strings that begin with http://, https:// and
|
|
|
|
# ftp://. In these cases, the opened file object is extended by OpenURI::Meta.
|
2019-07-14 11:18:17 +03:00
|
|
|
def self.open(name, *rest, &block)
|
2003-11-13 14:54:31 +03:00
|
|
|
if name.respond_to?(:open)
|
2003-02-05 13:44:05 +03:00
|
|
|
name.open(*rest, &block)
|
2004-01-07 14:31:21 +03:00
|
|
|
elsif name.respond_to?(:to_str) &&
|
2003-11-13 14:39:16 +03:00
|
|
|
%r{\A[A-Za-z][A-Za-z0-9+\-\.]*://} =~ name &&
|
|
|
|
(uri = URI.parse(name)).respond_to?(:open)
|
|
|
|
uri.open(*rest, &block)
|
2003-02-05 13:44:05 +03:00
|
|
|
else
|
2020-07-30 22:48:19 +03:00
|
|
|
super
|
2003-02-05 13:44:05 +03:00
|
|
|
end
|
2002-12-16 22:06:36 +03:00
|
|
|
end
|
2017-12-21 17:15:04 +03:00
|
|
|
end
|
|
|
|
|
2012-10-20 06:01:23 +04:00
|
|
|
# OpenURI is an easy-to-use wrapper for Net::HTTP, Net::HTTPS and Net::FTP.
|
2006-08-04 22:05:50 +04:00
|
|
|
#
|
2012-10-20 06:01:23 +04:00
|
|
|
# == Example
|
2006-08-04 22:05:50 +04:00
|
|
|
#
|
2011-08-27 02:22:37 +04:00
|
|
|
# It is possible to open an http, https or ftp URL as though it were a file:
|
2006-08-04 22:05:50 +04:00
|
|
|
#
|
2019-10-26 11:29:43 +03:00
|
|
|
# URI.open("http://www.ruby-lang.org/") {|f|
|
2006-08-04 22:05:50 +04:00
|
|
|
# f.each_line {|line| p line}
|
|
|
|
# }
|
|
|
|
#
|
2011-08-27 02:22:37 +04:00
|
|
|
# The opened file has several getter methods for its meta-information, as
|
|
|
|
# follows, since it is extended by OpenURI::Meta.
|
2006-08-04 22:05:50 +04:00
|
|
|
#
|
2019-10-26 11:29:43 +03:00
|
|
|
# URI.open("http://www.ruby-lang.org/en") {|f|
|
2006-08-04 22:05:50 +04:00
|
|
|
# f.each_line {|line| p line}
|
|
|
|
# p f.base_uri # <URI::HTTP:0x40e6ef2 URL:http://www.ruby-lang.org/en/>
|
|
|
|
# p f.content_type # "text/html"
|
|
|
|
# p f.charset # "iso-8859-1"
|
|
|
|
# p f.content_encoding # []
|
|
|
|
# p f.last_modified # Thu Dec 05 02:45:02 UTC 2002
|
|
|
|
# }
|
|
|
|
#
|
|
|
|
# Additional header fields can be specified by an optional hash argument.
|
|
|
|
#
|
2019-10-26 11:29:43 +03:00
|
|
|
# URI.open("http://www.ruby-lang.org/en/",
|
2006-08-04 22:05:50 +04:00
|
|
|
# "User-Agent" => "Ruby/#{RUBY_VERSION}",
|
|
|
|
# "From" => "foo@bar.invalid",
|
|
|
|
# "Referer" => "http://www.ruby-lang.org/") {|f|
|
|
|
|
# # ...
|
|
|
|
# }
|
|
|
|
#
|
|
|
|
# The environment variables such as http_proxy, https_proxy and ftp_proxy
|
2012-10-20 06:01:23 +04:00
|
|
|
# are in effect by default. Here we disable proxy:
|
2006-08-04 22:05:50 +04:00
|
|
|
#
|
2019-10-26 11:29:43 +03:00
|
|
|
# URI.open("http://www.ruby-lang.org/en/", :proxy => nil) {|f|
|
2006-08-04 22:05:50 +04:00
|
|
|
# # ...
|
|
|
|
# }
|
|
|
|
#
|
2019-10-26 11:29:43 +03:00
|
|
|
# See OpenURI::OpenRead.open and URI.open for more on available options.
|
2012-10-20 06:01:23 +04:00
|
|
|
#
|
2006-08-04 22:05:50 +04:00
|
|
|
# URI objects can be opened in a similar way.
|
|
|
|
#
|
|
|
|
# uri = URI.parse("http://www.ruby-lang.org/en/")
|
|
|
|
# uri.open {|f|
|
|
|
|
# # ...
|
|
|
|
# }
|
|
|
|
#
|
|
|
|
# URI objects can be read directly. The returned string is also extended by
|
|
|
|
# OpenURI::Meta.
|
|
|
|
#
|
|
|
|
# str = uri.read
|
|
|
|
# p str.base_uri
|
|
|
|
#
|
|
|
|
# Author:: Tanaka Akira <akr@m17n.org>
|
|
|
|
|
2003-02-05 13:44:05 +03:00
|
|
|
module OpenURI
|
2003-11-24 11:02:36 +03:00
|
|
|
Options = {
|
|
|
|
:proxy => true,
|
2005-09-29 05:18:14 +04:00
|
|
|
:proxy_http_basic_authentication => true,
|
2003-11-24 11:02:36 +03:00
|
|
|
:progress_proc => true,
|
|
|
|
:content_length_proc => true,
|
2005-02-11 05:47:11 +03:00
|
|
|
:http_basic_authentication => true,
|
2005-09-15 08:56:25 +04:00
|
|
|
:read_timeout => true,
|
2014-10-10 23:15:56 +04:00
|
|
|
:open_timeout => true,
|
2006-02-19 18:17:16 +03:00
|
|
|
:ssl_ca_cert => nil,
|
|
|
|
:ssl_verify_mode => nil,
|
2007-08-29 13:38:36 +04:00
|
|
|
:ftp_active_mode => false,
|
2007-10-28 15:55:51 +03:00
|
|
|
:redirect => true,
|
2017-10-21 09:22:53 +03:00
|
|
|
:encoding => nil,
|
2003-11-24 11:02:36 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
def OpenURI.check_options(options) # :nodoc:
|
|
|
|
options.each {|k, v|
|
|
|
|
next unless Symbol === k
|
|
|
|
unless Options.include? k
|
|
|
|
raise ArgumentError, "unrecognized option: #{k}"
|
|
|
|
end
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2003-02-05 13:44:05 +03:00
|
|
|
def OpenURI.scan_open_optional_arguments(*rest) # :nodoc:
|
2002-12-16 22:06:36 +03:00
|
|
|
if !rest.empty? && (String === rest.first || Integer === rest.first)
|
|
|
|
mode = rest.shift
|
|
|
|
if !rest.empty? && Integer === rest.first
|
|
|
|
perm = rest.shift
|
|
|
|
end
|
|
|
|
end
|
2003-02-05 13:44:05 +03:00
|
|
|
return mode, perm, rest
|
|
|
|
end
|
|
|
|
|
|
|
|
def OpenURI.open_uri(name, *rest) # :nodoc:
|
|
|
|
uri = URI::Generic === name ? name : URI.parse(name)
|
2010-10-30 02:02:39 +04:00
|
|
|
mode, _, rest = OpenURI.scan_open_optional_arguments(*rest)
|
2003-02-05 13:44:05 +03:00
|
|
|
options = rest.shift if !rest.empty? && Hash === rest.first
|
|
|
|
raise ArgumentError.new("extra arguments") if !rest.empty?
|
2003-11-24 11:02:36 +03:00
|
|
|
options ||= {}
|
|
|
|
OpenURI.check_options(options)
|
2002-12-16 22:06:36 +03:00
|
|
|
|
2008-10-05 12:51:22 +04:00
|
|
|
if /\Arb?(?:\Z|:([^:]+))/ =~ mode
|
|
|
|
encoding, = $1,Encoding.find($1) if $1
|
|
|
|
mode = nil
|
|
|
|
end
|
2017-10-21 09:22:53 +03:00
|
|
|
if options.has_key? :encoding
|
|
|
|
if !encoding.nil?
|
|
|
|
raise ArgumentError, "encoding specified twice"
|
|
|
|
end
|
|
|
|
encoding = Encoding.find(options[:encoding])
|
|
|
|
end
|
2008-10-05 12:51:22 +04:00
|
|
|
|
2002-12-16 22:06:36 +03:00
|
|
|
unless mode == nil ||
|
2002-12-18 22:22:46 +03:00
|
|
|
mode == 'r' || mode == 'rb' ||
|
2003-11-13 11:58:20 +03:00
|
|
|
mode == File::RDONLY
|
2002-12-16 22:06:36 +03:00
|
|
|
raise ArgumentError.new("invalid access mode #{mode} (#{uri.class} resource is read only.)")
|
|
|
|
end
|
|
|
|
|
2003-11-24 11:02:36 +03:00
|
|
|
io = open_loop(uri, options)
|
2008-10-05 12:51:22 +04:00
|
|
|
io.set_encoding(encoding) if encoding
|
2002-12-16 22:06:36 +03:00
|
|
|
if block_given?
|
|
|
|
begin
|
|
|
|
yield io
|
|
|
|
ensure
|
2012-07-23 17:49:56 +04:00
|
|
|
if io.respond_to? :close!
|
|
|
|
io.close! # Tempfile
|
2012-07-17 17:17:49 +04:00
|
|
|
else
|
2014-05-05 20:38:53 +04:00
|
|
|
io.close if !io.closed?
|
2012-07-17 17:17:49 +04:00
|
|
|
end
|
2002-12-16 22:06:36 +03:00
|
|
|
end
|
|
|
|
else
|
|
|
|
io
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2003-02-05 13:44:05 +03:00
|
|
|
def OpenURI.open_loop(uri, options) # :nodoc:
|
2005-09-30 20:48:46 +04:00
|
|
|
proxy_opts = []
|
|
|
|
proxy_opts << :proxy_http_basic_authentication if options.include? :proxy_http_basic_authentication
|
|
|
|
proxy_opts << :proxy if options.include? :proxy
|
|
|
|
proxy_opts.compact!
|
|
|
|
if 1 < proxy_opts.length
|
|
|
|
raise ArgumentError, "multiple proxy options specified"
|
|
|
|
end
|
|
|
|
case proxy_opts.first
|
|
|
|
when :proxy_http_basic_authentication
|
|
|
|
opt_proxy, proxy_user, proxy_pass = options.fetch(:proxy_http_basic_authentication)
|
2005-09-29 05:18:14 +04:00
|
|
|
proxy_user = proxy_user.to_str
|
|
|
|
proxy_pass = proxy_pass.to_str
|
|
|
|
if opt_proxy == true
|
|
|
|
raise ArgumentError.new("Invalid authenticated proxy option: #{options[:proxy_http_basic_authentication].inspect}")
|
|
|
|
end
|
2005-09-30 20:48:46 +04:00
|
|
|
when :proxy
|
|
|
|
opt_proxy = options.fetch(:proxy)
|
|
|
|
proxy_user = nil
|
|
|
|
proxy_pass = nil
|
|
|
|
when nil
|
|
|
|
opt_proxy = true
|
2005-09-29 05:18:14 +04:00
|
|
|
proxy_user = nil
|
|
|
|
proxy_pass = nil
|
|
|
|
end
|
|
|
|
case opt_proxy
|
2002-12-16 22:06:36 +03:00
|
|
|
when true
|
2005-11-03 01:58:39 +03:00
|
|
|
find_proxy = lambda {|u| pxy = u.find_proxy; pxy ? [pxy, nil, nil] : nil}
|
2002-12-16 22:06:36 +03:00
|
|
|
when nil, false
|
|
|
|
find_proxy = lambda {|u| nil}
|
|
|
|
when String
|
|
|
|
opt_proxy = URI.parse(opt_proxy)
|
2005-09-29 05:18:14 +04:00
|
|
|
find_proxy = lambda {|u| [opt_proxy, proxy_user, proxy_pass]}
|
2002-12-16 22:06:36 +03:00
|
|
|
when URI::Generic
|
2005-09-29 05:18:14 +04:00
|
|
|
find_proxy = lambda {|u| [opt_proxy, proxy_user, proxy_pass]}
|
2002-12-16 22:06:36 +03:00
|
|
|
else
|
|
|
|
raise ArgumentError.new("Invalid proxy option: #{opt_proxy}")
|
|
|
|
end
|
|
|
|
|
|
|
|
uri_set = {}
|
2003-11-24 17:36:18 +03:00
|
|
|
buf = nil
|
|
|
|
while true
|
|
|
|
redirect = catch(:open_uri_redirect) {
|
|
|
|
buf = Buffer.new
|
2005-02-12 08:01:18 +03:00
|
|
|
uri.buffer_open(buf, find_proxy.call(uri), options)
|
2003-11-24 17:36:18 +03:00
|
|
|
nil
|
|
|
|
}
|
|
|
|
if redirect
|
|
|
|
if redirect.relative?
|
2004-04-04 11:57:39 +04:00
|
|
|
# Although it violates RFC2616, Location: field may have relative
|
|
|
|
# URI. It is converted to absolute URI using uri as a base URI.
|
2003-11-24 17:36:18 +03:00
|
|
|
redirect = uri + redirect
|
|
|
|
end
|
2007-10-28 15:55:51 +03:00
|
|
|
if !options.fetch(:redirect, true)
|
|
|
|
raise HTTPRedirect.new(buf.io.status.join(' '), buf.io, redirect)
|
|
|
|
end
|
2003-11-24 17:36:18 +03:00
|
|
|
unless OpenURI.redirectable?(uri, redirect)
|
|
|
|
raise "redirection forbidden: #{uri} -> #{redirect}"
|
|
|
|
end
|
2005-02-12 11:40:23 +03:00
|
|
|
if options.include? :http_basic_authentication
|
|
|
|
# send authentication only for the URI directly specified.
|
|
|
|
options = options.dup
|
|
|
|
options.delete :http_basic_authentication
|
|
|
|
end
|
2003-11-24 17:36:18 +03:00
|
|
|
uri = redirect
|
|
|
|
raise "HTTP redirection loop: #{uri}" if uri_set.include? uri.to_s
|
2004-04-21 15:11:40 +04:00
|
|
|
uri_set[uri.to_s] = true
|
2002-12-16 22:06:36 +03:00
|
|
|
else
|
2003-11-24 17:36:18 +03:00
|
|
|
break
|
2003-02-05 12:58:18 +03:00
|
|
|
end
|
2002-12-16 22:06:36 +03:00
|
|
|
end
|
|
|
|
io = buf.io
|
|
|
|
io.base_uri = uri
|
|
|
|
io
|
|
|
|
end
|
|
|
|
|
2003-11-24 17:36:18 +03:00
|
|
|
def OpenURI.redirectable?(uri1, uri2) # :nodoc:
|
|
|
|
# This test is intended to forbid a redirection from http://... to
|
2011-05-28 03:45:12 +04:00
|
|
|
# file:///etc/passwd, file:///dev/zero, etc. CVE-2011-1521
|
2007-10-28 15:55:51 +03:00
|
|
|
# https to http redirect is also forbidden intentionally.
|
|
|
|
# It avoids sending secure cookie or referer by non-secure HTTP protocol.
|
|
|
|
# (RFC 2109 4.3.1, RFC 2965 3.3, RFC 2616 15.1.3)
|
2003-11-24 17:36:18 +03:00
|
|
|
# However this is ad hoc. It should be extensible/configurable.
|
|
|
|
uri1.scheme.downcase == uri2.scheme.downcase ||
|
2016-09-07 10:14:30 +03:00
|
|
|
(/\A(?:http|ftp)\z/i =~ uri1.scheme && /\A(?:https?|ftp)\z/i =~ uri2.scheme)
|
2002-12-16 22:06:36 +03:00
|
|
|
end
|
|
|
|
|
2005-02-12 08:01:18 +03:00
|
|
|
def OpenURI.open_http(buf, target, proxy, options) # :nodoc:
|
|
|
|
if proxy
|
2005-09-29 05:18:14 +04:00
|
|
|
proxy_uri, proxy_user, proxy_pass = proxy
|
|
|
|
raise "Non-HTTP proxy URI: #{proxy_uri}" if proxy_uri.class != URI::HTTP
|
2005-02-12 08:01:18 +03:00
|
|
|
end
|
|
|
|
|
2014-08-08 14:30:19 +04:00
|
|
|
if target.userinfo
|
2005-02-19 07:55:16 +03:00
|
|
|
raise ArgumentError, "userinfo not supported. [RFC3986]"
|
2005-02-12 08:12:56 +03:00
|
|
|
end
|
|
|
|
|
2005-09-29 05:18:14 +04:00
|
|
|
header = {}
|
|
|
|
options.each {|k, v| header[k] = v if String === k }
|
|
|
|
|
2005-02-12 08:01:18 +03:00
|
|
|
require 'net/http'
|
|
|
|
klass = Net::HTTP
|
|
|
|
if URI::HTTP === target
|
|
|
|
# HTTP or HTTPS
|
|
|
|
if proxy
|
2016-03-30 03:46:05 +03:00
|
|
|
unless proxy_user && proxy_pass
|
|
|
|
proxy_user, proxy_pass = proxy_uri.userinfo.split(':') if proxy_uri.userinfo
|
|
|
|
end
|
2005-09-29 05:18:14 +04:00
|
|
|
if proxy_user && proxy_pass
|
2010-10-06 07:30:49 +04:00
|
|
|
klass = Net::HTTP::Proxy(proxy_uri.hostname, proxy_uri.port, proxy_user, proxy_pass)
|
2005-09-29 05:18:14 +04:00
|
|
|
else
|
2010-10-06 07:30:49 +04:00
|
|
|
klass = Net::HTTP::Proxy(proxy_uri.hostname, proxy_uri.port)
|
2005-09-29 05:18:14 +04:00
|
|
|
end
|
2005-02-12 08:01:18 +03:00
|
|
|
end
|
2010-10-06 07:30:49 +04:00
|
|
|
target_host = target.hostname
|
2005-02-12 08:01:18 +03:00
|
|
|
target_port = target.port
|
|
|
|
request_uri = target.request_uri
|
|
|
|
else
|
|
|
|
# FTP over HTTP proxy
|
2010-10-06 07:30:49 +04:00
|
|
|
target_host = proxy_uri.hostname
|
2005-09-29 05:18:14 +04:00
|
|
|
target_port = proxy_uri.port
|
2005-02-12 08:01:18 +03:00
|
|
|
request_uri = target.to_s
|
2005-09-29 05:18:14 +04:00
|
|
|
if proxy_user && proxy_pass
|
2016-01-10 03:35:43 +03:00
|
|
|
header["Proxy-Authorization"] =
|
|
|
|
'Basic ' + ["#{proxy_user}:#{proxy_pass}"].pack('m0')
|
2005-09-29 05:18:14 +04:00
|
|
|
end
|
2005-02-12 08:01:18 +03:00
|
|
|
end
|
|
|
|
|
2014-01-18 16:20:31 +04:00
|
|
|
http = proxy ? klass.new(target_host, target_port) : klass.new(target_host, target_port, nil)
|
2005-02-12 08:01:18 +03:00
|
|
|
if target.class == URI::HTTPS
|
|
|
|
require 'net/https'
|
|
|
|
http.use_ssl = true
|
2006-02-19 18:17:16 +03:00
|
|
|
http.verify_mode = options[:ssl_verify_mode] || OpenSSL::SSL::VERIFY_PEER
|
2005-02-12 08:01:18 +03:00
|
|
|
store = OpenSSL::X509::Store.new
|
2006-02-19 18:17:16 +03:00
|
|
|
if options[:ssl_ca_cert]
|
2014-12-24 12:11:05 +03:00
|
|
|
Array(options[:ssl_ca_cert]).each do |cert|
|
2014-12-23 18:06:40 +03:00
|
|
|
if File.directory? cert
|
|
|
|
store.add_path cert
|
|
|
|
else
|
|
|
|
store.add_file cert
|
|
|
|
end
|
2006-02-19 18:17:16 +03:00
|
|
|
end
|
|
|
|
else
|
|
|
|
store.set_default_paths
|
|
|
|
end
|
2005-02-12 08:01:18 +03:00
|
|
|
http.cert_store = store
|
|
|
|
end
|
2005-09-15 08:56:25 +04:00
|
|
|
if options.include? :read_timeout
|
|
|
|
http.read_timeout = options[:read_timeout]
|
|
|
|
end
|
2014-10-10 23:15:56 +04:00
|
|
|
if options.include? :open_timeout
|
|
|
|
http.open_timeout = options[:open_timeout]
|
|
|
|
end
|
2005-02-12 08:01:18 +03:00
|
|
|
|
|
|
|
resp = nil
|
|
|
|
http.start {
|
|
|
|
req = Net::HTTP::Get.new(request_uri, header)
|
|
|
|
if options.include? :http_basic_authentication
|
|
|
|
user, pass = options[:http_basic_authentication]
|
|
|
|
req.basic_auth user, pass
|
|
|
|
end
|
|
|
|
http.request(req) {|response|
|
|
|
|
resp = response
|
|
|
|
if options[:content_length_proc] && Net::HTTPSuccess === resp
|
|
|
|
if resp.key?('Content-Length')
|
|
|
|
options[:content_length_proc].call(resp['Content-Length'].to_i)
|
|
|
|
else
|
|
|
|
options[:content_length_proc].call(nil)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
resp.read_body {|str|
|
|
|
|
buf << str
|
|
|
|
if options[:progress_proc] && Net::HTTPSuccess === resp
|
|
|
|
options[:progress_proc].call(buf.size)
|
|
|
|
end
|
2018-01-08 04:11:33 +03:00
|
|
|
str.clear
|
2005-02-12 08:01:18 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
io = buf.io
|
|
|
|
io.rewind
|
|
|
|
io.status = [resp.code, resp.message]
|
2013-04-09 14:53:41 +04:00
|
|
|
resp.each_name {|name| buf.io.meta_add_field2 name, resp.get_fields(name) }
|
2005-02-12 08:01:18 +03:00
|
|
|
case resp
|
|
|
|
when Net::HTTPSuccess
|
|
|
|
when Net::HTTPMovedPermanently, # 301
|
|
|
|
Net::HTTPFound, # 302
|
|
|
|
Net::HTTPSeeOther, # 303
|
|
|
|
Net::HTTPTemporaryRedirect # 307
|
2008-02-08 16:11:46 +03:00
|
|
|
begin
|
|
|
|
loc_uri = URI.parse(resp['location'])
|
|
|
|
rescue URI::InvalidURIError
|
|
|
|
raise OpenURI::HTTPError.new(io.status.join(' ') + ' (Invalid Location URI)', io)
|
|
|
|
end
|
|
|
|
throw :open_uri_redirect, loc_uri
|
2005-02-12 08:01:18 +03:00
|
|
|
else
|
|
|
|
raise OpenURI::HTTPError.new(io.status.join(' '), io)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2002-12-16 22:06:36 +03:00
|
|
|
class HTTPError < StandardError
|
|
|
|
def initialize(message, io)
|
|
|
|
super(message)
|
|
|
|
@io = io
|
|
|
|
end
|
|
|
|
attr_reader :io
|
|
|
|
end
|
|
|
|
|
2012-10-20 06:01:23 +04:00
|
|
|
# Raised on redirection,
|
|
|
|
# only occurs when +redirect+ option for HTTP is +false+.
|
2007-10-28 15:55:51 +03:00
|
|
|
class HTTPRedirect < HTTPError
|
|
|
|
def initialize(message, io, uri)
|
|
|
|
super(message, io)
|
|
|
|
@uri = uri
|
|
|
|
end
|
|
|
|
attr_reader :uri
|
|
|
|
end
|
|
|
|
|
2012-10-20 06:01:23 +04:00
|
|
|
class Buffer # :nodoc: all
|
2002-12-16 22:06:36 +03:00
|
|
|
def initialize
|
|
|
|
@io = StringIO.new
|
2003-11-15 04:09:21 +03:00
|
|
|
@size = 0
|
2002-12-16 22:06:36 +03:00
|
|
|
end
|
2003-11-15 04:09:21 +03:00
|
|
|
attr_reader :size
|
2002-12-16 22:06:36 +03:00
|
|
|
|
|
|
|
StringMax = 10240
|
|
|
|
def <<(str)
|
|
|
|
@io << str
|
2003-11-15 04:09:21 +03:00
|
|
|
@size += str.length
|
|
|
|
if StringIO === @io && StringMax < @size
|
2002-12-16 22:06:36 +03:00
|
|
|
require 'tempfile'
|
|
|
|
io = Tempfile.new('open-uri')
|
2004-01-31 23:33:33 +03:00
|
|
|
io.binmode
|
2007-12-24 17:04:16 +03:00
|
|
|
Meta.init io, @io if Meta === @io
|
2002-12-16 22:06:36 +03:00
|
|
|
io << @io.string
|
|
|
|
@io = io
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def io
|
2007-12-24 17:04:16 +03:00
|
|
|
Meta.init @io unless Meta === @io
|
2002-12-16 22:06:36 +03:00
|
|
|
@io
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# Mixin for holding meta-information.
|
|
|
|
module Meta
|
2003-02-05 13:44:05 +03:00
|
|
|
def Meta.init(obj, src=nil) # :nodoc:
|
2002-12-16 22:06:36 +03:00
|
|
|
obj.extend Meta
|
|
|
|
obj.instance_eval {
|
|
|
|
@base_uri = nil
|
2013-04-09 14:53:41 +04:00
|
|
|
@meta = {} # name to string. legacy.
|
|
|
|
@metas = {} # name to array of strings.
|
2002-12-16 22:06:36 +03:00
|
|
|
}
|
|
|
|
if src
|
|
|
|
obj.status = src.status
|
|
|
|
obj.base_uri = src.base_uri
|
2013-04-09 14:53:41 +04:00
|
|
|
src.metas.each {|name, values|
|
|
|
|
obj.meta_add_field2(name, values)
|
2002-12-16 22:06:36 +03:00
|
|
|
}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2011-08-27 02:22:37 +04:00
|
|
|
# returns an Array that consists of status code and message.
|
2002-12-16 22:06:36 +03:00
|
|
|
attr_accessor :status
|
|
|
|
|
2011-08-27 02:22:37 +04:00
|
|
|
# returns a URI that is the base of relative URIs in the data.
|
|
|
|
# It may differ from the URI supplied by a user due to redirection.
|
2002-12-16 22:06:36 +03:00
|
|
|
attr_accessor :base_uri
|
|
|
|
|
2011-08-27 02:22:37 +04:00
|
|
|
# returns a Hash that represents header fields.
|
2002-12-16 22:06:36 +03:00
|
|
|
# The Hash keys are downcased for canonicalization.
|
2013-04-09 14:53:41 +04:00
|
|
|
# The Hash values are a field body.
|
|
|
|
# If there are multiple field with same field name,
|
|
|
|
# the field values are concatenated with a comma.
|
2002-12-16 22:06:36 +03:00
|
|
|
attr_reader :meta
|
|
|
|
|
2013-04-09 14:53:41 +04:00
|
|
|
# returns a Hash that represents header fields.
|
|
|
|
# The Hash keys are downcased for canonicalization.
|
|
|
|
# The Hash value are an array of field values.
|
|
|
|
attr_reader :metas
|
|
|
|
|
2008-02-28 12:10:32 +03:00
|
|
|
def meta_setup_encoding # :nodoc:
|
|
|
|
charset = self.charset
|
2008-02-28 17:38:33 +03:00
|
|
|
enc = nil
|
|
|
|
if charset
|
|
|
|
begin
|
|
|
|
enc = Encoding.find(charset)
|
|
|
|
rescue ArgumentError
|
|
|
|
end
|
2008-02-28 12:10:32 +03:00
|
|
|
end
|
2008-02-28 17:38:33 +03:00
|
|
|
enc = Encoding::ASCII_8BIT unless enc
|
2008-02-28 12:10:32 +03:00
|
|
|
if self.respond_to? :force_encoding
|
|
|
|
self.force_encoding(enc)
|
|
|
|
elsif self.respond_to? :string
|
|
|
|
self.string.force_encoding(enc)
|
|
|
|
else # Tempfile
|
|
|
|
self.set_encoding enc
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2013-04-09 14:53:41 +04:00
|
|
|
def meta_add_field2(name, values) # :nodoc:
|
2008-02-28 12:10:32 +03:00
|
|
|
name = name.downcase
|
2013-04-09 14:53:41 +04:00
|
|
|
@metas[name] = values
|
|
|
|
@meta[name] = values.join(', ')
|
2008-02-28 12:10:32 +03:00
|
|
|
meta_setup_encoding if name == 'content-type'
|
2002-12-16 22:06:36 +03:00
|
|
|
end
|
|
|
|
|
2013-10-26 12:49:58 +04:00
|
|
|
def meta_add_field(name, value) # :nodoc:
|
|
|
|
meta_add_field2(name, [value])
|
|
|
|
end
|
|
|
|
|
2011-08-27 02:22:37 +04:00
|
|
|
# returns a Time that represents the Last-Modified field.
|
2002-12-16 22:06:36 +03:00
|
|
|
def last_modified
|
2013-04-09 14:53:41 +04:00
|
|
|
if vs = @metas['last-modified']
|
|
|
|
v = vs.join(', ')
|
2002-12-16 22:06:36 +03:00
|
|
|
Time.httpdate(v)
|
|
|
|
else
|
|
|
|
nil
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2012-10-20 06:01:23 +04:00
|
|
|
# :stopdoc:
|
2002-12-16 22:06:36 +03:00
|
|
|
RE_LWS = /[\r\n\t ]+/n
|
|
|
|
RE_TOKEN = %r{[^\x00- ()<>@,;:\\"/\[\]?={}\x7f]+}n
|
2005-05-25 08:03:20 +04:00
|
|
|
RE_QUOTED_STRING = %r{"(?:[\r\n\t !#-\[\]-~\x80-\xff]|\\[\x00-\x7f])*"}n
|
2002-12-16 22:06:36 +03:00
|
|
|
RE_PARAMETERS = %r{(?:;#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?=#{RE_LWS}?(?:#{RE_TOKEN}|#{RE_QUOTED_STRING})#{RE_LWS}?)*}n
|
2012-10-20 06:01:23 +04:00
|
|
|
# :startdoc:
|
2002-12-16 22:06:36 +03:00
|
|
|
|
2003-02-05 13:44:05 +03:00
|
|
|
def content_type_parse # :nodoc:
|
2013-04-09 14:53:41 +04:00
|
|
|
vs = @metas['content-type']
|
2004-04-04 11:57:39 +04:00
|
|
|
# The last (?:;#{RE_LWS}?)? matches extra ";" which violates RFC2045.
|
2013-04-09 14:53:41 +04:00
|
|
|
if vs && %r{\A#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?/(#{RE_TOKEN})#{RE_LWS}?(#{RE_PARAMETERS})(?:;#{RE_LWS}?)?\z}no =~ vs.join(', ')
|
2002-12-16 22:06:36 +03:00
|
|
|
type = $1.downcase
|
|
|
|
subtype = $2.downcase
|
|
|
|
parameters = []
|
2004-03-27 15:43:02 +03:00
|
|
|
$3.scan(/;#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?=#{RE_LWS}?(?:(#{RE_TOKEN})|(#{RE_QUOTED_STRING}))/no) {|att, val, qval|
|
2009-09-13 06:29:47 +04:00
|
|
|
if qval
|
|
|
|
val = qval[1...-1].gsub(/[\r\n\t !#-\[\]-~\x80-\xff]+|(\\[\x00-\x7f])/n) { $1 ? $1[1,1] : $& }
|
|
|
|
end
|
2002-12-16 22:06:36 +03:00
|
|
|
parameters << [att.downcase, val]
|
|
|
|
}
|
|
|
|
["#{type}/#{subtype}", *parameters]
|
|
|
|
else
|
|
|
|
nil
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# returns "type/subtype" which is MIME Content-Type.
|
|
|
|
# It is downcased for canonicalization.
|
|
|
|
# Content-Type parameters are stripped.
|
|
|
|
def content_type
|
2010-10-30 02:02:39 +04:00
|
|
|
type, *_ = content_type_parse
|
2002-12-16 22:06:36 +03:00
|
|
|
type || 'application/octet-stream'
|
|
|
|
end
|
|
|
|
|
|
|
|
# returns a charset parameter in Content-Type field.
|
|
|
|
# It is downcased for canonicalization.
|
2003-05-15 13:48:12 +04:00
|
|
|
#
|
|
|
|
# If charset parameter is not given but a block is given,
|
|
|
|
# the block is called and its result is returned.
|
|
|
|
# It can be used to guess charset.
|
|
|
|
#
|
|
|
|
# If charset parameter and block is not given,
|
2019-07-15 03:36:52 +03:00
|
|
|
# nil is returned except text type.
|
|
|
|
# In that case, "utf-8" is returned as defined by RFC6838 4.2.1
|
2002-12-16 22:06:36 +03:00
|
|
|
def charset
|
|
|
|
type, *parameters = content_type_parse
|
|
|
|
if pair = parameters.assoc('charset')
|
|
|
|
pair.last.downcase
|
2003-05-15 13:48:12 +04:00
|
|
|
elsif block_given?
|
|
|
|
yield
|
2019-07-15 03:36:52 +03:00
|
|
|
elsif type && %r{\Atext/} =~ type
|
|
|
|
"utf-8" # RFC6838 4.2.1
|
2002-12-16 22:06:36 +03:00
|
|
|
else
|
|
|
|
nil
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2014-02-07 17:00:15 +04:00
|
|
|
# Returns a list of encodings in Content-Encoding field as an array of
|
|
|
|
# strings.
|
|
|
|
#
|
2002-12-16 22:06:36 +03:00
|
|
|
# The encodings are downcased for canonicalization.
|
|
|
|
def content_encoding
|
2013-04-09 14:53:41 +04:00
|
|
|
vs = @metas['content-encoding']
|
|
|
|
if vs && %r{\A#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?(?:,#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?)*}o =~ (v = vs.join(', '))
|
2002-12-16 22:06:36 +03:00
|
|
|
v.scan(RE_TOKEN).map {|content_coding| content_coding.downcase}
|
|
|
|
else
|
|
|
|
[]
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2003-11-13 14:39:16 +03:00
|
|
|
# Mixin for HTTP and FTP URIs.
|
2002-12-16 22:06:36 +03:00
|
|
|
module OpenRead
|
2003-11-24 11:02:36 +03:00
|
|
|
# OpenURI::OpenRead#open provides `open' for URI::HTTP and URI::FTP.
|
|
|
|
#
|
|
|
|
# OpenURI::OpenRead#open takes optional 3 arguments as:
|
|
|
|
#
|
2011-08-27 03:11:38 +04:00
|
|
|
# OpenURI::OpenRead#open([mode [, perm]] [, options]) [{|io| ... }]
|
2003-11-24 11:02:36 +03:00
|
|
|
#
|
2011-08-27 03:11:38 +04:00
|
|
|
# OpenURI::OpenRead#open returns an IO-like object if block is not given.
|
|
|
|
# Otherwise it yields the IO object and return the value of the block.
|
|
|
|
# The IO object is extended with OpenURI::Meta.
|
|
|
|
#
|
|
|
|
# +mode+ and +perm+ are the same as Kernel#open.
|
|
|
|
#
|
|
|
|
# However, +mode+ must be read mode because OpenURI::OpenRead#open doesn't
|
2003-11-24 11:02:36 +03:00
|
|
|
# support write mode (yet).
|
2011-08-27 03:11:38 +04:00
|
|
|
# Also +perm+ is ignored because it is meaningful only for file creation.
|
2003-11-24 11:02:36 +03:00
|
|
|
#
|
2011-08-27 03:11:38 +04:00
|
|
|
# +options+ must be a hash.
|
2003-11-24 11:02:36 +03:00
|
|
|
#
|
2011-08-27 03:11:38 +04:00
|
|
|
# Each option with a string key specifies an extra header field for HTTP.
|
|
|
|
# I.e., it is ignored for FTP without HTTP proxy.
|
2003-11-24 11:02:36 +03:00
|
|
|
#
|
2011-08-27 02:22:37 +04:00
|
|
|
# The hash may include other options, where keys are symbols:
|
2003-11-24 11:02:36 +03:00
|
|
|
#
|
2005-02-08 06:06:37 +03:00
|
|
|
# [:proxy]
|
|
|
|
# Synopsis:
|
|
|
|
# :proxy => "http://proxy.foo.com:8000/"
|
|
|
|
# :proxy => URI.parse("http://proxy.foo.com:8000/")
|
|
|
|
# :proxy => true
|
|
|
|
# :proxy => false
|
|
|
|
# :proxy => nil
|
2008-09-14 19:18:53 +04:00
|
|
|
#
|
2005-02-08 06:06:37 +03:00
|
|
|
# If :proxy option is specified, the value should be String, URI,
|
|
|
|
# boolean or nil.
|
2011-08-27 03:11:38 +04:00
|
|
|
#
|
2005-02-08 06:06:37 +03:00
|
|
|
# When String or URI is given, it is treated as proxy URI.
|
2011-08-27 03:11:38 +04:00
|
|
|
#
|
2005-02-08 06:06:37 +03:00
|
|
|
# When true is given or the option itself is not specified,
|
|
|
|
# environment variable `scheme_proxy' is examined.
|
2005-02-12 08:38:44 +03:00
|
|
|
# `scheme' is replaced by `http', `https' or `ftp'.
|
2011-08-27 03:11:38 +04:00
|
|
|
#
|
2005-02-08 06:06:37 +03:00
|
|
|
# When false or nil is given, the environment variables are ignored and
|
|
|
|
# connection will be made to a server directly.
|
2003-11-24 11:02:36 +03:00
|
|
|
#
|
2005-09-29 05:18:14 +04:00
|
|
|
# [:proxy_http_basic_authentication]
|
|
|
|
# Synopsis:
|
2011-08-27 03:11:38 +04:00
|
|
|
# :proxy_http_basic_authentication =>
|
|
|
|
# ["http://proxy.foo.com:8000/", "proxy-user", "proxy-password"]
|
|
|
|
# :proxy_http_basic_authentication =>
|
|
|
|
# [URI.parse("http://proxy.foo.com:8000/"),
|
|
|
|
# "proxy-user", "proxy-password"]
|
2008-09-14 19:18:53 +04:00
|
|
|
#
|
2011-08-27 03:11:38 +04:00
|
|
|
# If :proxy option is specified, the value should be an Array with 3
|
|
|
|
# elements. It should contain a proxy URI, a proxy user name and a proxy
|
|
|
|
# password. The proxy URI should be a String, an URI or nil. The proxy
|
|
|
|
# user name and password should be a String.
|
2005-09-29 05:18:14 +04:00
|
|
|
#
|
|
|
|
# If nil is given for the proxy URI, this option is just ignored.
|
|
|
|
#
|
2008-09-14 19:18:53 +04:00
|
|
|
# If :proxy and :proxy_http_basic_authentication is specified,
|
2005-09-30 20:48:46 +04:00
|
|
|
# ArgumentError is raised.
|
2005-09-29 05:18:14 +04:00
|
|
|
#
|
2005-02-11 05:47:11 +03:00
|
|
|
# [:http_basic_authentication]
|
|
|
|
# Synopsis:
|
|
|
|
# :http_basic_authentication=>[user, password]
|
|
|
|
#
|
|
|
|
# If :http_basic_authentication is specified,
|
|
|
|
# the value should be an array which contains 2 strings:
|
|
|
|
# username and password.
|
|
|
|
# It is used for HTTP Basic authentication defined by RFC 2617.
|
|
|
|
#
|
2005-02-08 06:06:37 +03:00
|
|
|
# [:content_length_proc]
|
|
|
|
# Synopsis:
|
|
|
|
# :content_length_proc => lambda {|content_length| ... }
|
2008-09-14 19:18:53 +04:00
|
|
|
#
|
2005-02-08 06:06:37 +03:00
|
|
|
# If :content_length_proc option is specified, the option value procedure
|
|
|
|
# is called before actual transfer is started.
|
2011-08-27 02:22:37 +04:00
|
|
|
# It takes one argument, which is expected content length in bytes.
|
2008-09-14 19:18:53 +04:00
|
|
|
#
|
2016-10-20 10:19:58 +03:00
|
|
|
# If two or more transfers are performed by HTTP redirection, the
|
|
|
|
# procedure is called only once for the last transfer.
|
2008-09-14 19:18:53 +04:00
|
|
|
#
|
2005-02-08 06:06:37 +03:00
|
|
|
# When expected content length is unknown, the procedure is called with
|
2011-08-27 02:22:37 +04:00
|
|
|
# nil. This happens when the HTTP response has no Content-Length header.
|
2003-11-24 11:02:36 +03:00
|
|
|
#
|
2005-02-08 06:06:37 +03:00
|
|
|
# [:progress_proc]
|
|
|
|
# Synopsis:
|
|
|
|
# :progress_proc => lambda {|size| ...}
|
2003-11-24 11:02:36 +03:00
|
|
|
#
|
2005-02-08 06:06:37 +03:00
|
|
|
# If :progress_proc option is specified, the proc is called with one
|
|
|
|
# argument each time when `open' gets content fragment from network.
|
2011-08-27 03:11:38 +04:00
|
|
|
# The argument +size+ is the accumulated transferred size in bytes.
|
2003-11-24 11:02:36 +03:00
|
|
|
#
|
2005-02-08 06:06:37 +03:00
|
|
|
# If two or more transfer is done by HTTP redirection, the procedure
|
|
|
|
# is called only one for a last transfer.
|
2003-11-24 11:02:36 +03:00
|
|
|
#
|
2005-02-08 06:06:37 +03:00
|
|
|
# :progress_proc and :content_length_proc are intended to be used for
|
|
|
|
# progress bar.
|
|
|
|
# For example, it can be implemented as follows using Ruby/ProgressBar.
|
2003-11-24 11:02:36 +03:00
|
|
|
#
|
2005-02-08 06:06:37 +03:00
|
|
|
# pbar = nil
|
|
|
|
# open("http://...",
|
|
|
|
# :content_length_proc => lambda {|t|
|
|
|
|
# if t && 0 < t
|
|
|
|
# pbar = ProgressBar.new("...", t)
|
|
|
|
# pbar.file_transfer_mode
|
|
|
|
# end
|
|
|
|
# },
|
|
|
|
# :progress_proc => lambda {|s|
|
|
|
|
# pbar.set s if pbar
|
|
|
|
# }) {|f| ... }
|
2003-11-24 11:02:36 +03:00
|
|
|
#
|
2005-09-15 08:56:25 +04:00
|
|
|
# [:read_timeout]
|
|
|
|
# Synopsis:
|
|
|
|
# :read_timeout=>nil (no timeout)
|
|
|
|
# :read_timeout=>10 (10 second)
|
|
|
|
#
|
|
|
|
# :read_timeout option specifies a timeout of read for http connections.
|
|
|
|
#
|
2014-10-10 23:15:56 +04:00
|
|
|
# [:open_timeout]
|
|
|
|
# Synopsis:
|
|
|
|
# :open_timeout=>nil (no timeout)
|
|
|
|
# :open_timeout=>10 (10 second)
|
|
|
|
#
|
|
|
|
# :open_timeout option specifies a timeout of open for http connections.
|
|
|
|
#
|
2006-02-19 18:17:16 +03:00
|
|
|
# [:ssl_ca_cert]
|
|
|
|
# Synopsis:
|
2014-12-23 18:06:40 +03:00
|
|
|
# :ssl_ca_cert=>filename or an Array of filenames
|
2006-02-19 18:17:16 +03:00
|
|
|
#
|
|
|
|
# :ssl_ca_cert is used to specify CA certificate for SSL.
|
|
|
|
# If it is given, default certificates are not used.
|
|
|
|
#
|
|
|
|
# [:ssl_verify_mode]
|
|
|
|
# Synopsis:
|
|
|
|
# :ssl_verify_mode=>mode
|
|
|
|
#
|
|
|
|
# :ssl_verify_mode is used to specify openssl verify mode.
|
|
|
|
#
|
2007-08-29 13:38:36 +04:00
|
|
|
# [:ftp_active_mode]
|
|
|
|
# Synopsis:
|
|
|
|
# :ftp_active_mode=>bool
|
|
|
|
#
|
2011-08-27 03:11:38 +04:00
|
|
|
# <tt>:ftp_active_mode => true</tt> is used to make ftp active mode.
|
|
|
|
# Ruby 1.9 uses passive mode by default.
|
|
|
|
# Note that the active mode is default in Ruby 1.8 or prior.
|
2007-08-29 13:38:36 +04:00
|
|
|
#
|
2007-10-28 15:55:51 +03:00
|
|
|
# [:redirect]
|
|
|
|
# Synopsis:
|
|
|
|
# :redirect=>bool
|
|
|
|
#
|
2011-08-27 03:11:38 +04:00
|
|
|
# +:redirect+ is true by default. <tt>:redirect => false</tt> is used to
|
|
|
|
# disable all HTTP redirects.
|
|
|
|
#
|
|
|
|
# OpenURI::HTTPRedirect exception raised on redirection.
|
|
|
|
# Using +true+ also means that redirections between http and ftp are
|
|
|
|
# permitted.
|
2007-10-28 15:55:51 +03:00
|
|
|
#
|
2003-02-05 13:44:05 +03:00
|
|
|
def open(*rest, &block)
|
|
|
|
OpenURI.open_uri(self, *rest, &block)
|
2002-12-16 22:06:36 +03:00
|
|
|
end
|
|
|
|
|
2020-04-07 07:59:38 +03:00
|
|
|
# OpenURI::OpenRead#read([ options ]) reads a content referenced by self and
|
2003-11-24 11:02:36 +03:00
|
|
|
# returns the content as string.
|
|
|
|
# The string is extended with OpenURI::Meta.
|
2011-08-27 03:11:38 +04:00
|
|
|
# The argument +options+ is same as OpenURI::OpenRead#open.
|
2002-12-16 22:06:36 +03:00
|
|
|
def read(options={})
|
|
|
|
self.open(options) {|f|
|
|
|
|
str = f.read
|
|
|
|
Meta.init str, f
|
|
|
|
str
|
|
|
|
}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
module URI
|
|
|
|
class HTTP
|
2005-02-12 08:01:18 +03:00
|
|
|
def buffer_open(buf, proxy, options) # :nodoc:
|
|
|
|
OpenURI.open_http(buf, self, proxy, options)
|
2002-12-16 22:06:36 +03:00
|
|
|
end
|
|
|
|
|
|
|
|
include OpenURI::OpenRead
|
|
|
|
end
|
|
|
|
|
|
|
|
class FTP
|
2005-02-12 08:01:18 +03:00
|
|
|
def buffer_open(buf, proxy, options) # :nodoc:
|
|
|
|
if proxy
|
|
|
|
OpenURI.open_http(buf, self, proxy, options)
|
|
|
|
return
|
|
|
|
end
|
2021-05-26 14:18:46 +03:00
|
|
|
|
|
|
|
begin
|
|
|
|
require 'net/ftp'
|
|
|
|
rescue LoadError
|
|
|
|
abort "net/ftp is not found. You may need to `gem install net-ftp` to install net/ftp."
|
|
|
|
end
|
2005-02-19 18:53:43 +03:00
|
|
|
|
2009-09-12 20:18:03 +04:00
|
|
|
path = self.path
|
|
|
|
path = path.sub(%r{\A/}, '%2F') # re-encode the beginning slash because uri library decodes it.
|
|
|
|
directories = path.split(%r{/}, -1)
|
2005-02-19 18:53:43 +03:00
|
|
|
directories.each {|d|
|
|
|
|
d.gsub!(/%([0-9A-Fa-f][0-9A-Fa-f])/) { [$1].pack("H2") }
|
|
|
|
}
|
|
|
|
unless filename = directories.pop
|
|
|
|
raise ArgumentError, "no filename: #{self.inspect}"
|
|
|
|
end
|
|
|
|
directories.each {|d|
|
|
|
|
if /[\r\n]/ =~ d
|
|
|
|
raise ArgumentError, "invalid directory: #{d.inspect}"
|
|
|
|
end
|
|
|
|
}
|
|
|
|
if /[\r\n]/ =~ filename
|
|
|
|
raise ArgumentError, "invalid filename: #{filename.inspect}"
|
|
|
|
end
|
|
|
|
typecode = self.typecode
|
|
|
|
if typecode && /\A[aid]\z/ !~ typecode
|
|
|
|
raise ArgumentError, "invalid typecode: #{typecode.inspect}"
|
|
|
|
end
|
|
|
|
|
|
|
|
# The access sequence is defined by RFC 1738
|
2009-09-12 17:49:07 +04:00
|
|
|
ftp = Net::FTP.new
|
2010-10-06 07:30:49 +04:00
|
|
|
ftp.connect(self.hostname, self.port)
|
2015-11-11 06:48:45 +03:00
|
|
|
ftp.passive = !options[:ftp_active_mode]
|
2002-12-16 22:06:36 +03:00
|
|
|
# todo: extract user/passwd from .netrc.
|
|
|
|
user = 'anonymous'
|
|
|
|
passwd = nil
|
|
|
|
user, passwd = self.userinfo.split(/:/) if self.userinfo
|
|
|
|
ftp.login(user, passwd)
|
2005-02-19 18:53:43 +03:00
|
|
|
directories.each {|cwd|
|
|
|
|
ftp.voidcmd("CWD #{cwd}")
|
|
|
|
}
|
|
|
|
if typecode
|
|
|
|
# xxx: typecode D is not handled.
|
|
|
|
ftp.voidcmd("TYPE #{typecode.upcase}")
|
|
|
|
end
|
2003-11-24 11:02:36 +03:00
|
|
|
if options[:content_length_proc]
|
2005-02-19 18:53:43 +03:00
|
|
|
options[:content_length_proc].call(ftp.size(filename))
|
2003-11-24 11:02:36 +03:00
|
|
|
end
|
2005-02-19 18:53:43 +03:00
|
|
|
ftp.retrbinary("RETR #{filename}", 4096) { |str|
|
2003-11-15 04:09:21 +03:00
|
|
|
buf << str
|
2003-11-24 11:02:36 +03:00
|
|
|
options[:progress_proc].call(buf.size) if options[:progress_proc]
|
2003-11-15 04:09:21 +03:00
|
|
|
}
|
2002-12-16 22:06:36 +03:00
|
|
|
ftp.close
|
|
|
|
buf.io.rewind
|
|
|
|
end
|
|
|
|
|
|
|
|
include OpenURI::OpenRead
|
|
|
|
end
|
|
|
|
end
|