remove code duplication and put everything into forward order

In file regparse.c, in function node_extended_grapheme_cluster(),
eliminate code duplication of CRLF and '.' (any character). This
uses the fact that both for Unicode encodings and for non-Unicode
encodings, the first alternative is CRLF, and the last alternative
is '.' (any character). This puts all of the pieces into forward
order (the order of the code follows the order of the syntax
definition).

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@66267 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
duerst 2018-12-07 07:04:00 +00:00
Родитель 15a7ddf543
Коммит 7780553c30
1 изменённых файлов: 180 добавлений и 195 удалений

Просмотреть файл

@ -5807,8 +5807,8 @@ create_node_from_array(int kind, Node **np, Node **node_array)
* *
* Target Array name Index * Target Array name Index
* *
* node_array 0 1 2 3 4 5 6 7 8 9 A B C D E * node_array 0 1 2 3 4 5 6 7 8 9 A B C D E F
* top_alts alts[4] 0 1 2 3* * top_alts alts[5] 0 1 2 3 4*
* alts+1 list[4] 0 1 2 3* * alts+1 list[4] 0 1 2 3*
* list+1 core_alts[7] 0 1 2 3 4 5 6* * list+1 core_alts[7] 0 1 2 3 4 5 6*
* core_alts+0 H_list[4] 0 1 2 3* * core_alts+0 H_list[4] 0 1 2 3*
@ -5817,7 +5817,7 @@ create_node_from_array(int kind, Node **np, Node **node_array)
* core_alts+4 XP_list[4] 0 1 2 3* * core_alts+4 XP_list[4] 0 1 2 3*
* XP_list+1 Ex_list[4] 0 1 2 3* * XP_list+1 Ex_list[4] 0 1 2 3*
*/ */
#define NODE_COMMON_SIZE 15 #define NODE_COMMON_SIZE 16
static int static int
node_extended_grapheme_cluster(Node** np, ScanEnv* env) node_extended_grapheme_cluster(Node** np, ScanEnv* env)
@ -5828,32 +5828,42 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
int r = 0; int r = 0;
int num1; int num1;
int i; int i;
int any_target_position;
UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2]; UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2];
OnigOptionType option; OnigOptionType option;
/* node_array is function-global so that we can free all nodes /* node_common is function-global so that we can free all nodes
* in case of error. Unused slots are set to NULL_NODE at all times. */ * in case of error. Unused slots are set to NULL_NODE at all times. */
Node *node_common[NODE_COMMON_SIZE]; Node *node_common[NODE_COMMON_SIZE];
Node **alts = node_common+0; /* size: 5 */
for (i=0; i<NODE_COMMON_SIZE; i++)
node_common[i] = NULL_NODE;
/* CRLF, common for both Unicode and non-Unicode */
/* \x0D\x0A */
r = ONIGENC_CODE_TO_MBC(env->enc, 0x0D, buf);
if (r < 0) goto err;
num1 = r;
r = ONIGENC_CODE_TO_MBC(env->enc, 0x0A, buf + num1);
if (r < 0) goto err;
alts[0] = node_new_str_raw(buf, buf + num1 + r);
if (IS_NULL(alts[0])) goto err;
#ifdef USE_UNICODE_PROPERTIES #ifdef USE_UNICODE_PROPERTIES
if (ONIGENC_IS_UNICODE(env->enc)) { /* UTF-8, UTF-16BE/LE, UTF-32BE/LE */ if (ONIGENC_IS_UNICODE(env->enc)) { /* UTF-8, UTF-16BE/LE, UTF-32BE/LE */
CClassNode* cc; CClassNode* cc;
for (i=0; i<NODE_COMMON_SIZE; i++)
node_common[i] = NULL_NODE;
if (propname2ctype(env, "Grapheme_Cluster_Break=Extend") < 0) goto err; if (propname2ctype(env, "Grapheme_Cluster_Break=Extend") < 0) goto err;
/* Unicode 11.0.0 /* Unicode 11.0.0
* CRLF (this is added last because it is common with non-Unicode encodings) * CRLF (already done)
* | [Control CR LF] * | [Control CR LF]
* | precore* core postcore* * | precore* core postcore*
* | . (to catch invalid stuff, because this seems to be spec for String#grapheme_clusters) */ * | . (to catch invalid stuff, because this seems to be spec for String#grapheme_clusters) */
{
Node **alts = node_common+0; /* size: 4 */
/* [Control CR LF] (CR and LF are not in the spec, but this is a conformed fix) */ /* [Control CR LF] (CR and LF are not in the spec, but this is a conformed fix) */
alts[0] = node_new_cclass(); alts[1] = node_new_cclass();
if (IS_NULL(alts[0])) goto err; if (IS_NULL(alts[1])) goto err;
cc = NCCLASS(alts[0]); cc = NCCLASS(alts[1]);
R_ERR(add_property_to_cc(cc, "Grapheme_Cluster_Break=Control", 0, env)); R_ERR(add_property_to_cc(cc, "Grapheme_Cluster_Break=Control", 0, env));
if (ONIGENC_MBC_MINLEN(env->enc) > 1) { /* UTF-16/UTF-32 */ if (ONIGENC_MBC_MINLEN(env->enc) > 1) { /* UTF-16/UTF-32 */
R_ERR(add_code_range(&(cc->mbuf), env, 0x000A, 0x000A)); /* CR */ R_ERR(add_code_range(&(cc->mbuf), env, 0x000A, 0x000A)); /* CR */
@ -5866,7 +5876,7 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
/* precore* core postcore* */ /* precore* core postcore* */
{ {
Node **list = alts + 2; /* size: 4 */ Node **list = alts + 3; /* size: 4 */
/* precore*; precore := Prepend */ /* precore*; precore := Prepend */
R_ERR(quantify_property_node(list+0, env, "Grapheme_Cluster_Break=Prepend", '*')); R_ERR(quantify_property_node(list+0, env, "Grapheme_Cluster_Break=Prepend", '*'));
@ -5927,7 +5937,7 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
/* (Extend* ZWJ \p{Extended_Pictographic})* */ /* (Extend* ZWJ \p{Extended_Pictographic})* */
{ {
Node **Ex_list = XP_list + 2; /* size: 4 */ Node **Ex_list = XP_list + 2; /* size: 4 */
/* assert(Ex_list+4 <= node_common+NODE_COMMON_SIZE) */ if (!(Ex_list+4 == node_common+NODE_COMMON_SIZE)) exit(1);
R_ERR(quantify_property_node(Ex_list+0, env, "Grapheme_Cluster_Break=Extend", '*')); R_ERR(quantify_property_node(Ex_list+0, env, "Grapheme_Cluster_Break=Extend", '*'));
/* ZWJ (ZERO WIDTH JOINER) */ /* ZWJ (ZERO WIDTH JOINER) */
@ -5975,10 +5985,18 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
R_ERR(add_code_range(&(cc->mbuf), env, 0x200D, 0x200D)); R_ERR(add_code_range(&(cc->mbuf), env, 0x200D, 0x200D));
R_ERR(quantify_node(list+2, 0, REPEAT_INFINITE)); R_ERR(quantify_node(list+2, 0, REPEAT_INFINITE));
R_ERR(create_node_from_array(LIST, alts+1, list)); R_ERR(create_node_from_array(LIST, alts+2, list));
} }
/* PerlSyntax: (?s:.), RubySyntax: (?m:.) */ any_target_position = 3;
}
else
#endif /* USE_UNICODE_PROPERTIES */
{
any_target_position = 1;
}
/* PerlSyntax: (?s:.), RubySyntax: (?m:.), common for both Unicode and non-Unicode */
/* Not in Unicode spec (UAX #29), but added to catch invalid stuff, /* Not in Unicode spec (UAX #29), but added to catch invalid stuff,
* because this is Ruby spec for String#grapheme_clusters. */ * because this is Ruby spec for String#grapheme_clusters. */
np1 = node_new_anychar(); np1 = node_new_anychar();
@ -5989,47 +6007,14 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
tmp = node_new_option(option); tmp = node_new_option(option);
if (IS_NULL(tmp)) goto err; if (IS_NULL(tmp)) goto err;
NENCLOSE(tmp)->target = np1; NENCLOSE(tmp)->target = np1;
alts[2] = tmp; alts[any_target_position] = tmp;
np1 = NULL;
R_ERR(create_node_from_array(ALT, &top_alt, alts)); R_ERR(create_node_from_array(ALT, &top_alt, alts));
}
}
else
#endif /* USE_UNICODE_PROPERTIES */
{
/* PerlSyntax: (?s:.), RubySyntax: (?m:.) */
np1 = node_new_anychar();
if (IS_NULL(np1)) goto err;
option = env->option; /* (?>): For efficiency, because there is no text piece
ONOFF(option, ONIG_OPTION_MULTILINE, 0); * that is not in a grapheme cluster, and there is only one way
tmp = node_new_option(option); * to split a string into grapheme clusters. */
if (IS_NULL(tmp)) goto err;
NENCLOSE(tmp)->target = np1;
np1 = tmp;
top_alt = onig_node_new_alt(np1, NULL_NODE);
if (IS_NULL(top_alt)) goto err;
np1 = NULL;
}
/* add in CRLF to complete (CRLF | Control | precore* core postcore* | .) */
/* \x0D\x0A */
r = ONIGENC_CODE_TO_MBC(env->enc, 0x0D, buf);
if (r < 0) goto err;
num1 = r;
r = ONIGENC_CODE_TO_MBC(env->enc, 0x0A, buf + num1);
if (r < 0) goto err;
np1 = node_new_str_raw(buf, buf + num1 + r);
if (IS_NULL(np1)) goto err;
tmp = onig_node_new_alt(np1, top_alt);
if (IS_NULL(tmp)) goto err;
top_alt = tmp;
np1 = NULL;
/* (?>): For efficiency, because there is nothing that isn't in a grapheme cluster,
and there is only one way to split a string into grapheme clusters. */
tmp = node_new_enclose(ENCLOSE_STOP_BACKTRACK); tmp = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
if (IS_NULL(tmp)) goto err; if (IS_NULL(tmp)) goto err;
NENCLOSE(tmp)->target = top_alt; NENCLOSE(tmp)->target = top_alt;