зеркало из https://github.com/github/ruby.git
60 строки
2.0 KiB
Cheetah
60 строки
2.0 KiB
Cheetah
|
== \Regexps Based on Unicode Properties
|
||
|
|
||
|
The properties shown here are those currently supported in Ruby.
|
||
|
Older versions may not support all of these.
|
||
|
<%
|
||
|
# Generate a documentation file for the unicode properties.
|
||
|
#
|
||
|
# Usage:
|
||
|
#
|
||
|
# Get PropertyAliases.txt, PropertyValueAliases.txt from unicode.org
|
||
|
# (http://unicode.org/Public/UNIDATA/) and run
|
||
|
# ```
|
||
|
# ruby tool/generic_erb.rb template/unicode_properties.rdoc.tmpl data_dir name2ctype.h
|
||
|
# ```
|
||
|
|
||
|
data_dir = ARGV.shift&.tap { |d| Dir.exist?(d) } ||
|
||
|
abort("Usage: #{$0} data_directory [name2ctype.h]")
|
||
|
|
||
|
# Map group names, given as last argument to #make_const in enc-unicode.rb,
|
||
|
# to sections in the doc. The order in this hash controls the order in the doc.
|
||
|
map = {
|
||
|
/\[\[:/ => 'POSIX brackets',
|
||
|
'-' => 'Special',
|
||
|
/.+ Category/ => 'Major and General Categories',
|
||
|
'Binary Property' => 'Prop List',
|
||
|
/Derived Property/ => 'Derived Core Properties',
|
||
|
'Script' => 'Scripts',
|
||
|
'Block' => 'Blocks',
|
||
|
'Emoji' => 'Emoji',
|
||
|
/Grapheme/ => 'Graphemes',
|
||
|
/Derived Age/ => 'Derived Ages',
|
||
|
}
|
||
|
|
||
|
# aliases in the form { short => long }, e.g. { 'Hex' => 'Hex_Digit', 'L' => 'Letter' }
|
||
|
aliases = (
|
||
|
File.binread(File.join(data_dir, 'PropertyAliases.txt')).scan(/^(\w+)\s*; (\w+)/) +
|
||
|
File.binread(File.join(data_dir, 'PropertyValueAliases.txt')).scan(/^(?:gc|sc)\s*; (\w+)\s*; (\w+)/)
|
||
|
).to_h
|
||
|
|
||
|
props_by_section = {}
|
||
|
ARGF.each_line do |line|
|
||
|
next unless /'(?<prop>[^']+)': (?<name>.+) \*/ =~ line
|
||
|
next if prop == 'NEWLINE' # ignore custom internal prop
|
||
|
|
||
|
section = map.find { |k, v| k === name }&.last || warn("no doc section for #{name}")
|
||
|
|
||
|
# normalize prop names - the header file uses a mix of short and long names
|
||
|
long_prop_name = aliases[prop] || prop
|
||
|
(props_by_section[section] ||= []) << long_prop_name
|
||
|
end
|
||
|
|
||
|
map.each_value do |section| -%>
|
||
|
|
||
|
=== <%=section%>
|
||
|
|
||
|
% props_by_section[section].sort.each do |prop|
|
||
|
- <%= [prop, aliases.key(prop)].compact.uniq.map { |v| "<tt>\\p{#{v}}</tt>" }.join(', ') %>
|
||
|
% end
|
||
|
% end
|