зеркало из https://github.com/github/ruby.git
[ruby/uri] Refactor RFC3986 regexps to make more readable
https://github.com/ruby/uri/commit/3dfa19e920
This commit is contained in:
Родитель
3168f618cb
Коммит
57c5b0a980
|
@ -1,9 +1,69 @@
|
|||
# frozen_string_literal: false
|
||||
# frozen_string_literal: true
|
||||
module URI
|
||||
class RFC3986_Parser # :nodoc:
|
||||
# URI defined in RFC3986
|
||||
RFC3986_URI = /\A(?<URI>(?<scheme>[A-Za-z][+\-.0-9A-Za-z]*+):(?<hier-part>\/\/(?<authority>(?:(?<userinfo>(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*+)@)?(?<host>(?<IP-literal>\[(?:(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{1,4}?::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h++\.[!$&-.0-;=A-Z_a-z~]++))\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])*+))(?::(?<port>\d*+))?)(?<path-abempty>(?:\/(?<segment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*+))*+)|(?<path-absolute>\/(?:(?<segment-nz>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])++)(?:\/\g<segment>)*+)?)|(?<path-rootless>\g<segment-nz>(?:\/\g<segment>)*+)|(?<path-empty>))(?:\?(?<query>[^#]*+))?(?:\#(?<fragment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*+))?)\z/
|
||||
RFC3986_relative_ref = /\A(?<relative-ref>(?<relative-part>\/\/(?<authority>(?:(?<userinfo>(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*+)@)?(?<host>(?<IP-literal>\[(?:(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{1,4}?::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:){,1}\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h++\.[!$&-.0-;=A-Z_a-z~]++))\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])++))?(?::(?<port>\d*+))?)(?<path-abempty>(?:\/(?<segment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*+))*+)|(?<path-absolute>\/(?:(?<segment-nz>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])++)(?:\/\g<segment>)*+)?)|(?<path-noscheme>(?<segment-nz-nc>(?:%\h\h|[!$&-.0-9;=@-Z_a-z~])++)(?:\/\g<segment>)*+)|(?<path-empty>))(?:\?(?<query>[^#]*+))?(?:\#(?<fragment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*+))?)\z/
|
||||
HOST = %r[
|
||||
(?<IP-literal>\[(?:
|
||||
(?<IPv6address>
|
||||
(?:\h{1,4}:){6}
|
||||
(?<ls32>\h{1,4}:\h{1,4}
|
||||
| (?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)
|
||||
\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>)
|
||||
)
|
||||
| ::(?:\h{1,4}:){5}\g<ls32>
|
||||
| \h{1,4}?::(?:\h{1,4}:){4}\g<ls32>
|
||||
| (?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>
|
||||
| (?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>
|
||||
| (?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>
|
||||
| (?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>
|
||||
| (?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}
|
||||
| (?:(?:\h{1,4}:){,6}\h{1,4})?::
|
||||
)
|
||||
| (?<IPvFuture>v\h++\.[!$&-.0-9:;=A-Z_a-z~]++)
|
||||
)\])
|
||||
| \g<IPv4address>
|
||||
| (?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])*+)
|
||||
]x
|
||||
|
||||
USERINFO = /(?:%\h\h|[!$&-.0-9:;=A-Z_a-z~])*+/
|
||||
AUTHORITY = %r[
|
||||
(?:(?<userinfo>#{USERINFO.source})@)?
|
||||
(?<host>#{HOST.source.delete(" \n")})
|
||||
(?::(?<port>\d*+))?
|
||||
]x
|
||||
|
||||
SCHEME = %r[[A-Za-z][+\-.0-9A-Za-z]*+].source
|
||||
SEG = %r[(?:%\h\h|[!$&-.0-9:;=@A-Z_a-z~/])].source
|
||||
FRAGMENT = %r[(?:%\h\h|[!$&-.0-9:;=@A-Z_a-z~/?])*+].source
|
||||
|
||||
RFC3986_URI = %r[\A
|
||||
(?<seg>#{SEG}){0}
|
||||
(?<URI>
|
||||
(?<scheme>#{SCHEME}):
|
||||
(?<hier-part>//
|
||||
(?<authority>#{AUTHORITY})
|
||||
(?<path-abempty>(?:/\g<seg>*+)?)
|
||||
| (?<path-absolute>/\g<seg>*+)
|
||||
| (?<path-rootless>(?!=/)\g<seg>++)
|
||||
| (?<path-empty>)
|
||||
)
|
||||
(?:\?(?<query>[^\#]*+))?
|
||||
(?:\#(?<fragment>#{FRAGMENT}))?
|
||||
)\z]x
|
||||
|
||||
RFC3986_relative_ref = %r[\A
|
||||
(?<seg>#{SEG}){0}
|
||||
(?<relative-ref>
|
||||
(?<relative-part>//
|
||||
(?<authority>#{AUTHORITY})
|
||||
(?<path-abempty>(?:/\g<seg>*+)?)
|
||||
| (?<path-absolute>/\g<seg>*+)
|
||||
| (?<path-noscheme>(?!=[:/])\g<seg>++)
|
||||
| (?<path-empty>)
|
||||
)
|
||||
(?:\?(?<query>[^#]*+))?
|
||||
(?:\#(?<fragment>#{FRAGMENT}))?
|
||||
)\z]x
|
||||
attr_reader :regexp
|
||||
|
||||
def initialize
|
||||
|
@ -92,14 +152,14 @@ module URI
|
|||
|
||||
def default_regexp # :nodoc:
|
||||
{
|
||||
SCHEME: /\A[A-Za-z][A-Za-z0-9+\-.]*\z/,
|
||||
USERINFO: /\A(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*\z/,
|
||||
HOST: /\A(?:(?<IP-literal>\[(?:(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{,4}::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+))\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])*))\z/,
|
||||
ABS_PATH: /\A\/(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*(?:\/(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*)*\z/,
|
||||
REL_PATH: /\A(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+(?:\/(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*)*\z/,
|
||||
QUERY: /\A(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*\z/,
|
||||
FRAGMENT: /\A(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*\z/,
|
||||
OPAQUE: /\A(?:[^\/].*)?\z/,
|
||||
SCHEME: %r[\A#{SCHEME}\z]o,
|
||||
USERINFO: %r[\A#{USERINFO}\z]o,
|
||||
HOST: %r[\A#{HOST}\z]o,
|
||||
ABS_PATH: %r[\A/#{SEG}*+\z]o,
|
||||
REL_PATH: %r[\A(?!=/)#{SEG}++\z]o,
|
||||
QUERY: %r[\A(?:%\h\h|[!$&-.0-9:;=@A-Z_a-z~/?])*+\z],
|
||||
FRAGMENT: %r[\A#{FRAGMENT}\z]o,
|
||||
OPAQUE: %r[\A(?:[^/].*)?\z],
|
||||
PORT: /\A[\x09\x0a\x0c\x0d ]*\d*[\x09\x0a\x0c\x0d ]*\z/,
|
||||
}
|
||||
end
|
||||
|
|
Загрузка…
Ссылка в новой задаче