зеркало из https://github.com/github/ruby.git
[Bug #19728] Auto-generate unicode property docs
https://bugs.ruby-lang.org/issues/19728
This commit is contained in:
Родитель
3fd1968d6f
Коммит
08b3fb1152
16
common.mk
16
common.mk
|
@ -430,7 +430,7 @@ ruby.imp: $(COMMONOBJS)
|
|||
sort -u -o $@
|
||||
|
||||
install: install-$(INSTALLDOC)
|
||||
docs: $(DOCTARGETS)
|
||||
docs: srcs-doc $(DOCTARGETS)
|
||||
pkgconfig-data: $(ruby_pc)
|
||||
$(ruby_pc): $(srcdir)/template/ruby.pc.in config.status
|
||||
|
||||
|
@ -624,15 +624,15 @@ do-install-dbg: $(PROGRAM) pre-install-dbg
|
|||
post-install-dbg::
|
||||
@$(NULLCMD)
|
||||
|
||||
rdoc: PHONY main
|
||||
rdoc: PHONY main srcs-doc
|
||||
@echo Generating RDoc documentation
|
||||
$(Q) $(RDOC) --ri --op "$(RDOCOUT)" $(RDOC_GEN_OPTS) $(RDOCFLAGS) "$(srcdir)"
|
||||
|
||||
html: PHONY main
|
||||
html: PHONY main srcs-doc
|
||||
@echo Generating RDoc HTML files
|
||||
$(Q) $(RDOC) --op "$(HTMLOUT)" $(RDOC_GEN_OPTS) $(RDOCFLAGS) "$(srcdir)"
|
||||
|
||||
rdoc-coverage: PHONY main
|
||||
rdoc-coverage: PHONY main srcs-doc
|
||||
@echo Generating RDoc coverage report
|
||||
$(Q) $(RDOC) --quiet -C $(RDOCFLAGS) "$(srcdir)"
|
||||
|
||||
|
@ -1142,7 +1142,7 @@ common-srcs: $(srcs_vpath)parse.c $(srcs_vpath)lex.c $(srcs_vpath)enc/trans/newl
|
|||
|
||||
missing-srcs: $(srcdir)/missing/des_tables.c
|
||||
|
||||
srcs: common-srcs missing-srcs srcs-enc
|
||||
srcs: common-srcs missing-srcs srcs-enc srcs-doc
|
||||
|
||||
RIPPER_SRCS = $(srcdir)/ext/ripper/ripper.c \
|
||||
$(srcdir)/ext/ripper/ripper_init.c \
|
||||
|
@ -1730,6 +1730,12 @@ $(UNICODE_HDR_DIR)/name2ctype.h:
|
|||
$(UNICODE_SRC_DATA_DIR) $(UNICODE_SRC_EMOJI_DATA_DIR) > $@.new
|
||||
$(MV) $@.new $@
|
||||
|
||||
srcs-doc: $(srcdir)/doc/regexp/unicode_properties.rdoc
|
||||
$(srcdir)/doc/regexp/unicode_properties.rdoc: $(UNICODE_HDR_DIR)/name2ctype.h $(UNICODE_PROPERTY_FILES)
|
||||
$(Q) $(BOOTSTRAPRUBY) $(tooldir)/generic_erb.rb -c -o $@ \
|
||||
$(srcdir)/template/unicode_properties.rdoc.tmpl \
|
||||
$(UNICODE_SRC_DATA_DIR) $(UNICODE_HDR_DIR)/name2ctype.h
|
||||
|
||||
# the next non-comment line was:
|
||||
# $(UNICODE_HDR_DIR)/casefold.h: $(tooldir)/enc-case-folding.rb \
|
||||
# but was changed to make sure CI works on systems that don't have gperf
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -5402,7 +5402,7 @@ static const OnigCodePoint CR_ASCII[] = {
|
|||
0x0000, 0x007f,
|
||||
}; /* CR_ASCII */
|
||||
|
||||
/* 'Punct' */
|
||||
/* 'Punct': [[:Punct:]] */
|
||||
static const OnigCodePoint CR_Punct[] = {
|
||||
191,
|
||||
0x0021, 0x0023,
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
== \Regexps Based on Unicode Properties
|
||||
|
||||
The properties shown here are those currently supported in Ruby.
|
||||
Older versions may not support all of these.
|
||||
<%
|
||||
# Generate a documentation file for the unicode properties.
|
||||
#
|
||||
# Usage:
|
||||
#
|
||||
# Get PropertyAliases.txt, PropertyValueAliases.txt from unicode.org
|
||||
# (http://unicode.org/Public/UNIDATA/) and run
|
||||
# ```
|
||||
# ruby tool/generic_erb.rb template/unicode_properties.rdoc.tmpl data_dir name2ctype.h
|
||||
# ```
|
||||
|
||||
data_dir = ARGV.shift&.tap { |d| Dir.exist?(d) } ||
|
||||
abort("Usage: #{$0} data_directory [name2ctype.h]")
|
||||
|
||||
# Map group names, given as last argument to #make_const in enc-unicode.rb,
|
||||
# to sections in the doc. The order in this hash controls the order in the doc.
|
||||
map = {
|
||||
/\[\[:/ => 'POSIX brackets',
|
||||
'-' => 'Special',
|
||||
/.+ Category/ => 'Major and General Categories',
|
||||
'Binary Property' => 'Prop List',
|
||||
/Derived Property/ => 'Derived Core Properties',
|
||||
'Script' => 'Scripts',
|
||||
'Block' => 'Blocks',
|
||||
'Emoji' => 'Emoji',
|
||||
/Grapheme/ => 'Graphemes',
|
||||
/Derived Age/ => 'Derived Ages',
|
||||
}
|
||||
|
||||
# aliases in the form { short => long }, e.g. { 'Hex' => 'Hex_Digit', 'L' => 'Letter' }
|
||||
aliases = (
|
||||
File.binread(File.join(data_dir, 'PropertyAliases.txt')).scan(/^(\w+)\s*; (\w+)/) +
|
||||
File.binread(File.join(data_dir, 'PropertyValueAliases.txt')).scan(/^(?:gc|sc)\s*; (\w+)\s*; (\w+)/)
|
||||
).to_h
|
||||
|
||||
props_by_section = {}
|
||||
ARGF.each_line do |line|
|
||||
next unless /'(?<prop>[^']+)': (?<name>.+) \*/ =~ line
|
||||
next if prop == 'NEWLINE' # ignore custom internal prop
|
||||
|
||||
section = map.find { |k, v| k === name }&.last || warn("no doc section for #{name}")
|
||||
|
||||
# normalize prop names - the header file uses a mix of short and long names
|
||||
long_prop_name = aliases[prop] || prop
|
||||
(props_by_section[section] ||= []) << long_prop_name
|
||||
end
|
||||
|
||||
map.each_value do |section| -%>
|
||||
|
||||
=== <%=section%>
|
||||
|
||||
% props_by_section[section].sort.each do |prop|
|
||||
- <%= [prop, aliases.key(prop)].compact.uniq.map { |v| "<tt>\\p{#{v}}</tt>" }.join(', ') %>
|
||||
% end
|
||||
% end
|
|
@ -269,23 +269,12 @@ def parse_block(data)
|
|||
blocks << constname
|
||||
end
|
||||
|
||||
# shim for Ruby 1.8
|
||||
unless {}.respond_to?(:key)
|
||||
class Hash
|
||||
alias key index
|
||||
end
|
||||
end
|
||||
|
||||
$const_cache = {}
|
||||
# make_const(property, pairs, name): Prints a 'static const' structure for a
|
||||
# given property, group of paired codepoints, and a human-friendly name for
|
||||
# the group
|
||||
def make_const(prop, data, name)
|
||||
if name.empty?
|
||||
puts "\n/* '#{prop}' */"
|
||||
else
|
||||
puts "\n/* '#{prop}': #{name} */"
|
||||
end
|
||||
puts "\n/* '#{prop}': #{name} */" # comment used to generate documentation
|
||||
if origprop = $const_cache.key(data)
|
||||
puts "#define CR_#{prop} CR_#{origprop}"
|
||||
else
|
||||
|
@ -437,8 +426,6 @@ define_posix_props(data)
|
|||
POSIX_NAMES.each do |name|
|
||||
if name == 'XPosixPunct'
|
||||
make_const(name, data[name], "[[:Punct:]]")
|
||||
elsif name == 'Punct'
|
||||
make_const(name, data[name], "")
|
||||
else
|
||||
make_const(name, data[name], "[[:#{name}:]]")
|
||||
end
|
||||
|
|
Загрузка…
Ссылка в новой задаче