2022-10-13 12:01:58 +03:00
|
|
|
# frozen_string_literal: true
|
2014-06-22 04:22:19 +04:00
|
|
|
module URI
|
|
|
|
class RFC3986_Parser # :nodoc:
|
|
|
|
# URI defined in RFC3986
|
2022-10-13 12:01:58 +03:00
|
|
|
HOST = %r[
|
|
|
|
(?<IP-literal>\[(?:
|
|
|
|
(?<IPv6address>
|
|
|
|
(?:\h{1,4}:){6}
|
|
|
|
(?<ls32>\h{1,4}:\h{1,4}
|
|
|
|
| (?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)
|
|
|
|
\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>)
|
|
|
|
)
|
|
|
|
| ::(?:\h{1,4}:){5}\g<ls32>
|
|
|
|
| \h{1,4}?::(?:\h{1,4}:){4}\g<ls32>
|
|
|
|
| (?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>
|
|
|
|
| (?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>
|
|
|
|
| (?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>
|
|
|
|
| (?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>
|
|
|
|
| (?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}
|
|
|
|
| (?:(?:\h{1,4}:){,6}\h{1,4})?::
|
|
|
|
)
|
|
|
|
| (?<IPvFuture>v\h++\.[!$&-.0-9:;=A-Z_a-z~]++)
|
|
|
|
)\])
|
|
|
|
| \g<IPv4address>
|
|
|
|
| (?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])*+)
|
|
|
|
]x
|
|
|
|
|
|
|
|
USERINFO = /(?:%\h\h|[!$&-.0-9:;=A-Z_a-z~])*+/
|
|
|
|
|
|
|
|
SCHEME = %r[[A-Za-z][+\-.0-9A-Za-z]*+].source
|
|
|
|
SEG = %r[(?:%\h\h|[!$&-.0-9:;=@A-Z_a-z~/])].source
|
2023-06-13 07:54:44 +03:00
|
|
|
SEG_NC = %r[(?:%\h\h|[!$&-.0-9;=@A-Z_a-z~])].source
|
2022-10-13 12:01:58 +03:00
|
|
|
FRAGMENT = %r[(?:%\h\h|[!$&-.0-9:;=@A-Z_a-z~/?])*+].source
|
|
|
|
|
|
|
|
RFC3986_URI = %r[\A
|
|
|
|
(?<seg>#{SEG}){0}
|
|
|
|
(?<URI>
|
|
|
|
(?<scheme>#{SCHEME}):
|
|
|
|
(?<hier-part>//
|
2023-06-25 17:58:12 +03:00
|
|
|
(?<authority>
|
|
|
|
(?:(?<userinfo>#{USERINFO.source})@)?
|
|
|
|
(?<host>#{HOST.source.delete(" \n")})
|
|
|
|
(?::(?<port>\d*+))?
|
|
|
|
)
|
2022-10-13 12:01:58 +03:00
|
|
|
(?<path-abempty>(?:/\g<seg>*+)?)
|
2023-06-13 07:54:44 +03:00
|
|
|
| (?<path-absolute>/((?!/)\g<seg>++)?)
|
|
|
|
| (?<path-rootless>(?!/)\g<seg>++)
|
2022-10-13 12:01:58 +03:00
|
|
|
| (?<path-empty>)
|
|
|
|
)
|
|
|
|
(?:\?(?<query>[^\#]*+))?
|
|
|
|
(?:\#(?<fragment>#{FRAGMENT}))?
|
|
|
|
)\z]x
|
|
|
|
|
|
|
|
RFC3986_relative_ref = %r[\A
|
|
|
|
(?<seg>#{SEG}){0}
|
|
|
|
(?<relative-ref>
|
|
|
|
(?<relative-part>//
|
2023-06-25 17:58:12 +03:00
|
|
|
(?<authority>
|
|
|
|
(?:(?<userinfo>#{USERINFO.source})@)?
|
|
|
|
(?<host>#{HOST.source.delete(" \n")}(?<!/))?
|
|
|
|
(?::(?<port>\d*+))?
|
|
|
|
)
|
2022-10-13 12:01:58 +03:00
|
|
|
(?<path-abempty>(?:/\g<seg>*+)?)
|
|
|
|
| (?<path-absolute>/\g<seg>*+)
|
2023-06-13 07:54:44 +03:00
|
|
|
| (?<path-noscheme>#{SEG_NC}++(?:/\g<seg>*+)?)
|
2022-10-13 12:01:58 +03:00
|
|
|
| (?<path-empty>)
|
|
|
|
)
|
|
|
|
(?:\?(?<query>[^#]*+))?
|
|
|
|
(?:\#(?<fragment>#{FRAGMENT}))?
|
|
|
|
)\z]x
|
2014-12-25 02:50:37 +03:00
|
|
|
attr_reader :regexp
|
|
|
|
|
|
|
|
def initialize
|
|
|
|
@regexp = default_regexp.each_value(&:freeze).freeze
|
|
|
|
end
|
2014-06-22 04:22:19 +04:00
|
|
|
|
|
|
|
def split(uri) #:nodoc:
|
2014-09-27 07:13:22 +04:00
|
|
|
begin
|
|
|
|
uri = uri.to_str
|
|
|
|
rescue NoMethodError
|
2018-07-27 20:19:43 +03:00
|
|
|
raise InvalidURIError, "bad URI(is not URI?): #{uri.inspect}"
|
2014-09-27 07:13:22 +04:00
|
|
|
end
|
2014-12-25 02:50:37 +03:00
|
|
|
uri.ascii_only? or
|
2014-06-22 04:23:52 +04:00
|
|
|
raise InvalidURIError, "URI must be ascii only #{uri.dump}"
|
2014-06-22 04:22:19 +04:00
|
|
|
if m = RFC3986_URI.match(uri)
|
2023-06-25 18:39:27 +03:00
|
|
|
query = m["query"]
|
|
|
|
scheme = m["scheme"]
|
|
|
|
opaque = m["path-rootless"]
|
2014-12-25 02:50:37 +03:00
|
|
|
if opaque
|
|
|
|
opaque << "?#{query}" if query
|
|
|
|
[ scheme,
|
|
|
|
nil, # userinfo
|
|
|
|
nil, # host
|
|
|
|
nil, # port
|
|
|
|
nil, # registry
|
|
|
|
nil, # path
|
|
|
|
opaque,
|
|
|
|
nil, # query
|
2023-06-25 18:39:27 +03:00
|
|
|
m["fragment"]
|
2014-12-25 02:50:37 +03:00
|
|
|
]
|
2014-06-22 04:22:19 +04:00
|
|
|
else # normal
|
2014-12-25 02:50:37 +03:00
|
|
|
[ scheme,
|
2023-06-25 18:39:27 +03:00
|
|
|
m["userinfo"],
|
|
|
|
m["host"],
|
|
|
|
m["port"],
|
2014-12-25 02:50:37 +03:00
|
|
|
nil, # registry
|
2023-06-25 18:39:27 +03:00
|
|
|
(m["path-abempty"] ||
|
|
|
|
m["path-absolute"] ||
|
|
|
|
m["path-empty"]),
|
2014-12-25 02:50:37 +03:00
|
|
|
nil, # opaque
|
|
|
|
query,
|
2023-06-25 18:39:27 +03:00
|
|
|
m["fragment"]
|
2014-12-25 02:50:37 +03:00
|
|
|
]
|
2014-06-22 04:22:19 +04:00
|
|
|
end
|
|
|
|
elsif m = RFC3986_relative_ref.match(uri)
|
2014-12-25 02:50:37 +03:00
|
|
|
[ nil, # scheme
|
2023-06-25 18:39:27 +03:00
|
|
|
m["userinfo"],
|
|
|
|
m["host"],
|
|
|
|
m["port"],
|
2014-12-25 02:50:37 +03:00
|
|
|
nil, # registry,
|
2023-06-25 18:39:27 +03:00
|
|
|
(m["path-abempty"] ||
|
|
|
|
m["path-absolute"] ||
|
|
|
|
m["path-noscheme"] ||
|
|
|
|
m["path-empty"]),
|
2014-12-25 02:50:37 +03:00
|
|
|
nil, # opaque
|
2023-06-25 18:39:27 +03:00
|
|
|
m["query"],
|
|
|
|
m["fragment"]
|
2014-12-25 02:50:37 +03:00
|
|
|
]
|
2014-06-22 04:22:19 +04:00
|
|
|
else
|
2018-07-27 20:19:43 +03:00
|
|
|
raise InvalidURIError, "bad URI(is not URI?): #{uri.inspect}"
|
2014-06-22 04:22:19 +04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def parse(uri) # :nodoc:
|
2019-12-28 01:36:41 +03:00
|
|
|
URI.for(*self.split(uri), self)
|
2014-06-22 04:22:19 +04:00
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
def join(*uris) # :nodoc:
|
|
|
|
uris[0] = convert_to_uri(uris[0])
|
|
|
|
uris.inject :merge
|
|
|
|
end
|
|
|
|
|
|
|
|
@@to_s = Kernel.instance_method(:to_s)
|
2021-03-05 02:19:25 +03:00
|
|
|
if @@to_s.respond_to?(:bind_call)
|
|
|
|
def inspect
|
|
|
|
@@to_s.bind_call(self)
|
|
|
|
end
|
|
|
|
else
|
|
|
|
def inspect
|
|
|
|
@@to_s.bind(self).call
|
|
|
|
end
|
2014-06-22 04:22:19 +04:00
|
|
|
end
|
|
|
|
|
2014-12-25 02:50:37 +03:00
|
|
|
private
|
|
|
|
|
|
|
|
def default_regexp # :nodoc:
|
2014-06-22 04:22:19 +04:00
|
|
|
{
|
2022-10-13 12:01:58 +03:00
|
|
|
SCHEME: %r[\A#{SCHEME}\z]o,
|
|
|
|
USERINFO: %r[\A#{USERINFO}\z]o,
|
|
|
|
HOST: %r[\A#{HOST}\z]o,
|
|
|
|
ABS_PATH: %r[\A/#{SEG}*+\z]o,
|
2023-06-13 07:54:44 +03:00
|
|
|
REL_PATH: %r[\A(?!/)#{SEG}++\z]o,
|
2022-10-13 12:01:58 +03:00
|
|
|
QUERY: %r[\A(?:%\h\h|[!$&-.0-9:;=@A-Z_a-z~/?])*+\z],
|
|
|
|
FRAGMENT: %r[\A#{FRAGMENT}\z]o,
|
|
|
|
OPAQUE: %r[\A(?:[^/].*)?\z],
|
2023-04-22 14:09:10 +03:00
|
|
|
PORT: /\A[\x09\x0a\x0c\x0d ]*+\d*[\x09\x0a\x0c\x0d ]*\z/,
|
2014-06-22 04:22:19 +04:00
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def convert_to_uri(uri)
|
|
|
|
if uri.is_a?(URI::Generic)
|
|
|
|
uri
|
|
|
|
elsif uri = String.try_convert(uri)
|
|
|
|
parse(uri)
|
|
|
|
else
|
|
|
|
raise ArgumentError,
|
|
|
|
"bad argument (expected URI object or URI string)"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
end # class Parser
|
|
|
|
end # module URI
|