1660 строки
51 KiB
Diff
1660 строки
51 KiB
Diff
From 84ff1825dd82e8de45020e3def34d1430d8e5a99 Mon Sep 17 00:00:00 2001
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Date: Sat, 27 May 2023 16:16:58 +0200
|
|
Subject: [PATCH 02/19] awk: fix splitting with default FS
|
|
|
|
function old new delta
|
|
awk_split 543 544 +1
|
|
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
|
|
---
|
|
editors/awk.c | 13 ++++++++-----
|
|
1 file changed, 8 insertions(+), 5 deletions(-)
|
|
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
|
index 2af823808..b3748b502 100644
|
|
--- a/editors/awk.c
|
|
+++ b/editors/awk.c
|
|
@@ -2049,13 +2049,17 @@ static int awk_split(const char *s, node *spl, char **slist)
|
|
}
|
|
return n;
|
|
}
|
|
- /* space split */
|
|
+ /* space split: "In the special case that FS is a single space,
|
|
+ * fields are separated by runs of spaces and/or tabs and/or newlines"
|
|
+ */
|
|
while (*s) {
|
|
- s = skip_whitespace(s);
|
|
+ /* s = skip_whitespace(s); -- WRONG (also skips \v \f \r) */
|
|
+ while (*s == ' ' || *s == '\t' || *s == '\n')
|
|
+ s++;
|
|
if (!*s)
|
|
break;
|
|
n++;
|
|
- while (*s && !isspace(*s))
|
|
+ while (*s && !(*s == ' ' || *s == '\t' || *s == '\n'))
|
|
*s1++ = *s++;
|
|
*s1++ = '\0';
|
|
}
|
|
@@ -2304,7 +2308,6 @@ static int awk_getline(rstream *rsm, var *v)
|
|
setvar_i(intvar[ERRNO], errno);
|
|
}
|
|
b[p] = '\0';
|
|
-
|
|
} while (p > pp);
|
|
|
|
if (p == 0) {
|
|
@@ -3145,7 +3148,7 @@ static var *evaluate(node *op, var *res)
|
|
/* make sure that we never return a temp var */
|
|
if (L.v == TMPVAR0)
|
|
L.v = res;
|
|
- /* if source is a temporary string, jusk relink it to dest */
|
|
+ /* if source is a temporary string, just relink it to dest */
|
|
if (R.v == TMPVAR1
|
|
&& !(R.v->type & VF_NUMBER)
|
|
/* Why check !NUMBER? if R.v is a number but has cached R.v->string,
|
|
--
|
|
2.46.0
|
|
|
|
From 528808bcd25f7d237874dc82fad2adcddf354b42 Mon Sep 17 00:00:00 2001
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Date: Sat, 27 May 2023 18:05:42 +0200
|
|
Subject: [PATCH 03/19] awk: get rid of one indirection level for iF (input
|
|
file structure)
|
|
|
|
function old new delta
|
|
try_to_assign - 91 +91
|
|
next_input_file 214 216 +2
|
|
awk_main 827 826 -1
|
|
evaluate 3403 3396 -7
|
|
is_assignment 91 - -91
|
|
------------------------------------------------------------------------------
|
|
(add/remove: 1/1 grow/shrink: 1/2 up/down: 93/-99) Total: -6 bytes
|
|
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
|
|
---
|
|
editors/awk.c | 78 +++++++++++++++++++++++++++------------------------
|
|
1 file changed, 41 insertions(+), 37 deletions(-)
|
|
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
|
index b3748b502..22f52417d 100644
|
|
--- a/editors/awk.c
|
|
+++ b/editors/awk.c
|
|
@@ -546,7 +546,6 @@ struct globals {
|
|
chain beginseq, mainseq, endseq;
|
|
chain *seq;
|
|
node *break_ptr, *continue_ptr;
|
|
- rstream *iF;
|
|
xhash *ahash; /* argument names, used only while parsing function bodies */
|
|
xhash *fnhash; /* function names, used only in parsing stage */
|
|
xhash *vhash; /* variables and arrays */
|
|
@@ -579,11 +578,12 @@ struct globals2 {
|
|
|
|
var *intvar[NUM_INTERNAL_VARS]; /* often used */
|
|
|
|
+ rstream iF;
|
|
+
|
|
/* former statics from various functions */
|
|
char *split_f0__fstrings;
|
|
|
|
- rstream next_input_file__rsm;
|
|
- smallint next_input_file__files_happen;
|
|
+ smallint next_input_file__input_file_seen;
|
|
|
|
smalluint exitcode;
|
|
|
|
@@ -618,7 +618,6 @@ struct globals2 {
|
|
#define seq (G1.seq )
|
|
#define break_ptr (G1.break_ptr )
|
|
#define continue_ptr (G1.continue_ptr)
|
|
-#define iF (G1.iF )
|
|
#define ahash (G1.ahash )
|
|
#define fnhash (G1.fnhash )
|
|
#define vhash (G1.vhash )
|
|
@@ -644,6 +643,7 @@ struct globals2 {
|
|
#define t_string (G.t_string )
|
|
#define t_lineno (G.t_lineno )
|
|
#define intvar (G.intvar )
|
|
+#define iF (G.iF )
|
|
#define fsplitter (G.fsplitter )
|
|
#define rsplitter (G.rsplitter )
|
|
#define g_buf (G.g_buf )
|
|
@@ -2799,7 +2799,7 @@ static NOINLINE var *exec_builtin(node *op, var *res)
|
|
|
|
/* if expr looks like "var=value", perform assignment and return 1,
|
|
* otherwise return 0 */
|
|
-static int is_assignment(const char *expr)
|
|
+static int try_to_assign(const char *expr)
|
|
{
|
|
char *exprc, *val;
|
|
|
|
@@ -2819,39 +2819,44 @@ static int is_assignment(const char *expr)
|
|
}
|
|
|
|
/* switch to next input file */
|
|
-static rstream *next_input_file(void)
|
|
+static int next_input_file(void)
|
|
{
|
|
-#define rsm (G.next_input_file__rsm)
|
|
-#define files_happen (G.next_input_file__files_happen)
|
|
-
|
|
- const char *fname, *ind;
|
|
+#define input_file_seen (G.next_input_file__input_file_seen)
|
|
+ const char *fname;
|
|
|
|
- if (rsm.F)
|
|
- fclose(rsm.F);
|
|
- rsm.F = NULL;
|
|
- rsm.pos = rsm.adv = 0;
|
|
+ if (iF.F) {
|
|
+ fclose(iF.F);
|
|
+ iF.F = NULL;
|
|
+ iF.pos = iF.adv = 0;
|
|
+ }
|
|
|
|
for (;;) {
|
|
+ const char *ind;
|
|
+
|
|
if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
|
|
- if (files_happen)
|
|
- return NULL;
|
|
+ if (input_file_seen)
|
|
+ return FALSE;
|
|
fname = "-";
|
|
- rsm.F = stdin;
|
|
+ iF.F = stdin;
|
|
break;
|
|
}
|
|
ind = getvar_s(incvar(intvar[ARGIND]));
|
|
fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
|
|
- if (fname && *fname && !is_assignment(fname)) {
|
|
- rsm.F = xfopen_stdin(fname);
|
|
+ if (fname && *fname) {
|
|
+ /* "If a filename on the command line has the form
|
|
+ * var=val it is treated as a variable assignment"
|
|
+ */
|
|
+ if (try_to_assign(fname))
|
|
+ continue;
|
|
+ iF.F = xfopen_stdin(fname);
|
|
break;
|
|
}
|
|
}
|
|
|
|
- files_happen = TRUE;
|
|
setvar_s(intvar[FILENAME], fname);
|
|
- return &rsm;
|
|
-#undef rsm
|
|
-#undef files_happen
|
|
+ input_file_seen = TRUE;
|
|
+ return TRUE;
|
|
+#undef input_file_seen
|
|
}
|
|
|
|
/*
|
|
@@ -3231,12 +3236,12 @@ static var *evaluate(node *op, var *res)
|
|
}
|
|
}
|
|
} else {
|
|
- if (!iF)
|
|
- iF = next_input_file();
|
|
- rsm = iF;
|
|
+ if (!iF.F)
|
|
+ next_input_file();
|
|
+ rsm = &iF;
|
|
}
|
|
|
|
- if (!rsm || !rsm->F) {
|
|
+ if (!rsm->F) {
|
|
setvar_i(intvar[ERRNO], errno);
|
|
setvar_i(res, -1);
|
|
break;
|
|
@@ -3659,7 +3664,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
|
setvar_s(intvar[FS], opt_F);
|
|
}
|
|
while (list_v) {
|
|
- if (!is_assignment(llist_pop(&list_v)))
|
|
+ if (!try_to_assign(llist_pop(&list_v)))
|
|
bb_show_usage();
|
|
}
|
|
|
|
@@ -3718,15 +3723,14 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
|
awk_exit();
|
|
|
|
/* input file could already be opened in BEGIN block */
|
|
- if (!iF)
|
|
- iF = next_input_file();
|
|
-
|
|
- /* passing through input files */
|
|
- while (iF) {
|
|
+ if (!iF.F)
|
|
+ goto next_file; /* no, it wasn't, go try opening */
|
|
+ /* Iterate over input files */
|
|
+ for (;;) {
|
|
nextfile = FALSE;
|
|
setvar_i(intvar[FNR], 0);
|
|
|
|
- while ((i = awk_getline(iF, intvar[F0])) > 0) {
|
|
+ while ((i = awk_getline(&iF, intvar[F0])) > 0) {
|
|
nextrec = FALSE;
|
|
incvar(intvar[NR]);
|
|
incvar(intvar[FNR]);
|
|
@@ -3735,11 +3739,11 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
|
if (nextfile)
|
|
break;
|
|
}
|
|
-
|
|
if (i < 0)
|
|
syntax_error(strerror(errno));
|
|
-
|
|
- iF = next_input_file();
|
|
+ next_file:
|
|
+ if (!next_input_file())
|
|
+ break;
|
|
}
|
|
|
|
awk_exit();
|
|
--
|
|
2.46.0
|
|
|
|
From 5c8a9dfd976493e4351abadf6686b621763b564c Mon Sep 17 00:00:00 2001
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Date: Sat, 27 May 2023 18:21:38 +0200
|
|
Subject: [PATCH 04/19] awk: remove a local variable "caching" a struct member
|
|
|
|
Since we take its address, the variable lives on stack (not a GPR).
|
|
Thus, nothing is improved by caching it.
|
|
|
|
function old new delta
|
|
awk_getline 642 639 -3
|
|
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
|
|
---
|
|
editors/awk.c | 10 ++++------
|
|
1 file changed, 4 insertions(+), 6 deletions(-)
|
|
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
|
index 22f52417d..4a0eb9281 100644
|
|
--- a/editors/awk.c
|
|
+++ b/editors/awk.c
|
|
@@ -2236,7 +2236,7 @@ static int awk_getline(rstream *rsm, var *v)
|
|
{
|
|
char *b;
|
|
regmatch_t pmatch[1];
|
|
- int size, a, p, pp = 0;
|
|
+ int a, p, pp = 0;
|
|
int fd, so, eo, r, rp;
|
|
char c, *m, *s;
|
|
|
|
@@ -2249,12 +2249,11 @@ static int awk_getline(rstream *rsm, var *v)
|
|
m = rsm->buffer;
|
|
a = rsm->adv;
|
|
p = rsm->pos;
|
|
- size = rsm->size;
|
|
c = (char) rsplitter.n.info;
|
|
rp = 0;
|
|
|
|
if (!m)
|
|
- m = qrealloc(m, 256, &size);
|
|
+ m = qrealloc(m, 256, &rsm->size);
|
|
|
|
do {
|
|
b = m + a;
|
|
@@ -2298,10 +2297,10 @@ static int awk_getline(rstream *rsm, var *v)
|
|
a = 0;
|
|
}
|
|
|
|
- m = qrealloc(m, a+p+128, &size);
|
|
+ m = qrealloc(m, a+p+128, &rsm->size);
|
|
b = m + a;
|
|
pp = p;
|
|
- p += safe_read(fd, b+p, size-p-1);
|
|
+ p += safe_read(fd, b+p, rsm->size - p - 1);
|
|
if (p < pp) {
|
|
p = 0;
|
|
r = 0;
|
|
@@ -2325,7 +2324,6 @@ static int awk_getline(rstream *rsm, var *v)
|
|
rsm->buffer = m;
|
|
rsm->adv = a + eo;
|
|
rsm->pos = p - eo;
|
|
- rsm->size = size;
|
|
|
|
debug_printf_eval("returning from %s(): %d\n", __func__, r);
|
|
|
|
--
|
|
2.46.0
|
|
|
|
From 21dce1c3c3d74a60959b6d8b0c76f38d463b8187 Mon Sep 17 00:00:00 2001
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Date: Sat, 27 May 2023 19:11:28 +0200
|
|
Subject: [PATCH 05/19] awk: do not read ARGIND, only set it (gawk compat)
|
|
|
|
function old new delta
|
|
next_input_file 216 243 +27
|
|
evaluate 3396 3402 +6
|
|
awk_main 826 829 +3
|
|
------------------------------------------------------------------------------
|
|
(add/remove: 0/0 grow/shrink: 3/0 up/down: 36/0) Total: 36 bytes
|
|
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
|
|
---
|
|
editors/awk.c | 19 ++++++++++++++-----
|
|
1 file changed, 14 insertions(+), 5 deletions(-)
|
|
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
|
index 4a0eb9281..77e0b0aab 100644
|
|
--- a/editors/awk.c
|
|
+++ b/editors/awk.c
|
|
@@ -583,6 +583,7 @@ struct globals2 {
|
|
/* former statics from various functions */
|
|
char *split_f0__fstrings;
|
|
|
|
+ unsigned next_input_file__argind;
|
|
smallint next_input_file__input_file_seen;
|
|
|
|
smalluint exitcode;
|
|
@@ -2820,6 +2821,7 @@ static int try_to_assign(const char *expr)
|
|
static int next_input_file(void)
|
|
{
|
|
#define input_file_seen (G.next_input_file__input_file_seen)
|
|
+#define argind (G.next_input_file__argind)
|
|
const char *fname;
|
|
|
|
if (iF.F) {
|
|
@@ -2829,17 +2831,22 @@ static int next_input_file(void)
|
|
}
|
|
|
|
for (;;) {
|
|
- const char *ind;
|
|
-
|
|
- if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
|
|
+ /* GNU Awk 5.1.1 does not _read_ ARGIND (but does read ARGC).
|
|
+ * It only sets ARGIND to 1, 2, 3... for every command-line filename
|
|
+ * (VAR=VAL params cause a gap in numbering).
|
|
+ * If there are none and stdin is used, then ARGIND is not modified:
|
|
+ * if it is set by e.g. 'BEGIN { ARGIND="foo" }', that value will
|
|
+ * still be there.
|
|
+ */
|
|
+ argind++;
|
|
+ if (argind >= getvar_i(intvar[ARGC])) {
|
|
if (input_file_seen)
|
|
return FALSE;
|
|
fname = "-";
|
|
iF.F = stdin;
|
|
break;
|
|
}
|
|
- ind = getvar_s(incvar(intvar[ARGIND]));
|
|
- fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
|
|
+ fname = getvar_s(findvar(iamarray(intvar[ARGV]), utoa(argind)));
|
|
if (fname && *fname) {
|
|
/* "If a filename on the command line has the form
|
|
* var=val it is treated as a variable assignment"
|
|
@@ -2847,6 +2854,7 @@ static int next_input_file(void)
|
|
if (try_to_assign(fname))
|
|
continue;
|
|
iF.F = xfopen_stdin(fname);
|
|
+ setvar_i(intvar[ARGIND], argind);
|
|
break;
|
|
}
|
|
}
|
|
@@ -2854,6 +2862,7 @@ static int next_input_file(void)
|
|
setvar_s(intvar[FILENAME], fname);
|
|
input_file_seen = TRUE;
|
|
return TRUE;
|
|
+#undef argind
|
|
#undef input_file_seen
|
|
}
|
|
|
|
--
|
|
2.46.0
|
|
|
|
From b76b420b5da1aadad823faf12327b610614f5951 Mon Sep 17 00:00:00 2001
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Date: Sun, 28 May 2023 17:25:56 +0200
|
|
Subject: [PATCH 06/19] awk: fix closing of non-opened file
|
|
|
|
function old new delta
|
|
setvar_ERRNO - 53 +53
|
|
.rodata 105252 105246 -6
|
|
awk_getline 639 620 -19
|
|
evaluate 3402 3377 -25
|
|
------------------------------------------------------------------------------
|
|
(add/remove: 1/0 grow/shrink: 0/3 up/down: 53/-50) Total: 3 bytes
|
|
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
|
|
---
|
|
editors/awk.c | 23 +++++++++++++++--------
|
|
1 file changed, 15 insertions(+), 8 deletions(-)
|
|
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
|
index 77e0b0aab..83a08aa95 100644
|
|
--- a/editors/awk.c
|
|
+++ b/editors/awk.c
|
|
@@ -1006,6 +1006,11 @@ static var *setvar_i(var *v, double value)
|
|
return v;
|
|
}
|
|
|
|
+static void setvar_ERRNO(void)
|
|
+{
|
|
+ setvar_i(intvar[ERRNO], errno);
|
|
+}
|
|
+
|
|
static const char *getvar_s(var *v)
|
|
{
|
|
/* if v is numeric and has no cached string, convert it to string */
|
|
@@ -2305,7 +2310,7 @@ static int awk_getline(rstream *rsm, var *v)
|
|
if (p < pp) {
|
|
p = 0;
|
|
r = 0;
|
|
- setvar_i(intvar[ERRNO], errno);
|
|
+ setvar_ERRNO();
|
|
}
|
|
b[p] = '\0';
|
|
} while (p > pp);
|
|
@@ -3249,7 +3254,7 @@ static var *evaluate(node *op, var *res)
|
|
}
|
|
|
|
if (!rsm->F) {
|
|
- setvar_i(intvar[ERRNO], errno);
|
|
+ setvar_ERRNO();
|
|
setvar_i(res, -1);
|
|
break;
|
|
}
|
|
@@ -3388,16 +3393,18 @@ static var *evaluate(node *op, var *res)
|
|
*/
|
|
if (rsm->F)
|
|
err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
|
|
-//TODO: fix this case:
|
|
-// $ awk 'BEGIN { print close(""); print ERRNO }'
|
|
-// -1
|
|
-// close of redirection that was never opened
|
|
-// (we print 0, 0)
|
|
free(rsm->buffer);
|
|
hash_remove(fdhash, L.s);
|
|
+ } else {
|
|
+ err = -1;
|
|
+ /* gawk 'BEGIN { print close(""); print ERRNO }'
|
|
+ * -1
|
|
+ * close of redirection that was never opened
|
|
+ */
|
|
+ errno = ENOENT;
|
|
}
|
|
if (err)
|
|
- setvar_i(intvar[ERRNO], errno);
|
|
+ setvar_ERRNO();
|
|
R_d = (double)err;
|
|
break;
|
|
}
|
|
--
|
|
2.46.0
|
|
|
|
From 05e60007d42b8e4005085a22e122ef70bf888fa5 Mon Sep 17 00:00:00 2001
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Date: Sun, 28 May 2023 17:51:59 +0200
|
|
Subject: [PATCH 07/19] awk: code shrink
|
|
|
|
function old new delta
|
|
awk_getline 620 591 -29
|
|
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
|
|
---
|
|
editors/awk.c | 47 ++++++++++++++++++++++++-----------------------
|
|
1 file changed, 24 insertions(+), 23 deletions(-)
|
|
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
|
index 83a08aa95..eb419e063 100644
|
|
--- a/editors/awk.c
|
|
+++ b/editors/awk.c
|
|
@@ -2242,9 +2242,9 @@ static int awk_getline(rstream *rsm, var *v)
|
|
{
|
|
char *b;
|
|
regmatch_t pmatch[1];
|
|
- int a, p, pp = 0;
|
|
- int fd, so, eo, r, rp;
|
|
- char c, *m, *s;
|
|
+ int p, pp;
|
|
+ int fd, so, eo, retval, rp;
|
|
+ char *m, *s;
|
|
|
|
debug_printf_eval("entered %s()\n", __func__);
|
|
|
|
@@ -2253,22 +2253,22 @@ static int awk_getline(rstream *rsm, var *v)
|
|
*/
|
|
fd = fileno(rsm->F);
|
|
m = rsm->buffer;
|
|
- a = rsm->adv;
|
|
- p = rsm->pos;
|
|
- c = (char) rsplitter.n.info;
|
|
- rp = 0;
|
|
-
|
|
if (!m)
|
|
m = qrealloc(m, 256, &rsm->size);
|
|
+ p = rsm->pos;
|
|
+ rp = 0;
|
|
+ pp = 0;
|
|
|
|
do {
|
|
- b = m + a;
|
|
+ b = m + rsm->adv;
|
|
so = eo = p;
|
|
- r = 1;
|
|
+ retval = 1;
|
|
if (p > 0) {
|
|
+ char c = (char) rsplitter.n.info;
|
|
if (rsplitter.n.info == TI_REGEXP) {
|
|
if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
|
|
- b, 1, pmatch, 0) == 0) {
|
|
+ b, 1, pmatch, 0) == 0
|
|
+ ) {
|
|
so = pmatch[0].rm_so;
|
|
eo = pmatch[0].rm_eo;
|
|
if (b[eo] != '\0')
|
|
@@ -2297,43 +2297,44 @@ static int awk_getline(rstream *rsm, var *v)
|
|
}
|
|
}
|
|
|
|
- if (a > 0) {
|
|
- memmove(m, m+a, p+1);
|
|
+ if (rsm->adv > 0) {
|
|
+ memmove(m, m+rsm->adv, p+1);
|
|
b = m;
|
|
- a = 0;
|
|
+ rsm->adv = 0;
|
|
}
|
|
|
|
- m = qrealloc(m, a+p+128, &rsm->size);
|
|
- b = m + a;
|
|
+ b = m = qrealloc(m, p+128, &rsm->size);
|
|
pp = p;
|
|
p += safe_read(fd, b+p, rsm->size - p - 1);
|
|
if (p < pp) {
|
|
p = 0;
|
|
- r = 0;
|
|
+ retval = 0;
|
|
setvar_ERRNO();
|
|
}
|
|
b[p] = '\0';
|
|
} while (p > pp);
|
|
|
|
if (p == 0) {
|
|
- r--;
|
|
+ retval--;
|
|
} else {
|
|
- c = b[so]; b[so] = '\0';
|
|
+ char c = b[so];
|
|
+ b[so] = '\0';
|
|
setvar_s(v, b+rp);
|
|
v->type |= VF_USER;
|
|
b[so] = c;
|
|
- c = b[eo]; b[eo] = '\0';
|
|
+ c = b[eo];
|
|
+ b[eo] = '\0';
|
|
setvar_s(intvar[RT], b+so);
|
|
b[eo] = c;
|
|
}
|
|
|
|
rsm->buffer = m;
|
|
- rsm->adv = a + eo;
|
|
+ rsm->adv += eo;
|
|
rsm->pos = p - eo;
|
|
|
|
- debug_printf_eval("returning from %s(): %d\n", __func__, r);
|
|
+ debug_printf_eval("returning from %s(): %d\n", __func__, retval);
|
|
|
|
- return r;
|
|
+ return retval;
|
|
}
|
|
|
|
/* formatted output into an allocated buffer, return ptr to buffer */
|
|
--
|
|
2.46.0
|
|
|
|
From 4d7339204f9f823f592562d9903db3ae79a6c640 Mon Sep 17 00:00:00 2001
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Date: Sun, 28 May 2023 18:00:51 +0200
|
|
Subject: [PATCH 08/19] awk: shrink - use setvar_sn() to set variables from
|
|
non-NUL terminated strings
|
|
|
|
function old new delta
|
|
setvar_sn - 39 +39
|
|
exec_builtin 1145 1136 -9
|
|
awk_getline 591 559 -32
|
|
------------------------------------------------------------------------------
|
|
(add/remove: 1/0 grow/shrink: 0/2 up/down: 39/-41) Total: -2 bytes
|
|
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
|
|
---
|
|
editors/awk.c | 23 +++++++++--------------
|
|
1 file changed, 9 insertions(+), 14 deletions(-)
|
|
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
|
index eb419e063..b5774a339 100644
|
|
--- a/editors/awk.c
|
|
+++ b/editors/awk.c
|
|
@@ -979,6 +979,11 @@ static var *setvar_s(var *v, const char *value)
|
|
return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
|
|
}
|
|
|
|
+static var *setvar_sn(var *v, const char *value, int len)
|
|
+{
|
|
+ return setvar_p(v, (value && *value && len > 0) ? xstrndup(value, len) : NULL);
|
|
+}
|
|
+
|
|
/* same as setvar_s but sets USER flag */
|
|
static var *setvar_u(var *v, const char *value)
|
|
{
|
|
@@ -2317,15 +2322,9 @@ static int awk_getline(rstream *rsm, var *v)
|
|
if (p == 0) {
|
|
retval--;
|
|
} else {
|
|
- char c = b[so];
|
|
- b[so] = '\0';
|
|
- setvar_s(v, b+rp);
|
|
+ setvar_sn(v, b+rp, so-rp);
|
|
v->type |= VF_USER;
|
|
- b[so] = c;
|
|
- c = b[eo];
|
|
- b[eo] = '\0';
|
|
- setvar_s(intvar[RT], b+so);
|
|
- b[eo] = c;
|
|
+ setvar_sn(intvar[RT], b+so, eo-so);
|
|
}
|
|
|
|
rsm->buffer = m;
|
|
@@ -2677,8 +2676,6 @@ static NOINLINE var *exec_builtin(node *op, var *res)
|
|
}
|
|
|
|
case B_ss: {
|
|
- char *s;
|
|
-
|
|
l = strlen(as[0]);
|
|
i = getvar_i(av[1]) - 1;
|
|
if (i > l)
|
|
@@ -2688,8 +2685,7 @@ static NOINLINE var *exec_builtin(node *op, var *res)
|
|
n = (nargs > 2) ? getvar_i(av[2]) : l-i;
|
|
if (n < 0)
|
|
n = 0;
|
|
- s = xstrndup(as[0]+i, n);
|
|
- setvar_p(res, s);
|
|
+ setvar_sn(res, as[0]+i, n);
|
|
break;
|
|
}
|
|
|
|
@@ -2766,8 +2762,7 @@ static NOINLINE var *exec_builtin(node *op, var *res)
|
|
i = strftime(g_buf, MAXVARFMT,
|
|
((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
|
|
localtime(&tt));
|
|
- g_buf[i] = '\0';
|
|
- setvar_s(res, g_buf);
|
|
+ setvar_sn(res, g_buf, i);
|
|
break;
|
|
|
|
case B_mt:
|
|
--
|
|
2.46.0
|
|
|
|
From 721bf6eaf4739a2865b071b38d3478f334234d26 Mon Sep 17 00:00:00 2001
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Date: Mon, 29 May 2023 10:55:40 +0200
|
|
Subject: [PATCH 09/19] awk: printf(INVALID_FMT) prints it verbatim
|
|
|
|
function old new delta
|
|
awk_printf 628 640 +12
|
|
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
|
|
---
|
|
editors/awk.c | 12 +++++++++---
|
|
1 file changed, 9 insertions(+), 3 deletions(-)
|
|
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
|
index b5774a339..c49ad6e02 100644
|
|
--- a/editors/awk.c
|
|
+++ b/editors/awk.c
|
|
@@ -2389,7 +2389,7 @@ static char *awk_printf(node *n, size_t *len)
|
|
while (1) {
|
|
if (isalpha(c))
|
|
break;
|
|
- if (c == '*')
|
|
+ if (c == '*') /* gawk supports %*d and %*.*f, we don't... */
|
|
syntax_error("%*x formats are not supported");
|
|
c = *++f;
|
|
if (!c) { /* "....%...." and no letter found after % */
|
|
@@ -2422,12 +2422,18 @@ static char *awk_printf(node *n, size_t *len)
|
|
double d = getvar_i(arg);
|
|
if (strchr("diouxX", c)) {
|
|
//TODO: make it wider here (%x -> %llx etc)?
|
|
+//Can even print the value into a temp string with %.0f,
|
|
+//then replace diouxX with s and print that string.
|
|
+//This will correctly print even very large numbers,
|
|
+//but some replacements are not equivalent:
|
|
+//%09d -> %09s: breaks zero-padding;
|
|
+//%+d -> %+s: won't prepend +; etc
|
|
s = xasprintf(s, (int)d);
|
|
} else if (strchr("eEfFgGaA", c)) {
|
|
s = xasprintf(s, d);
|
|
} else {
|
|
-//TODO: GNU Awk 5.0.1: printf "%W" prints "%W", does not error out
|
|
- syntax_error(EMSG_INV_FMT);
|
|
+ /* gawk 5.1.1 printf("%W") prints "%W", does not error out */
|
|
+ s = xstrndup(s, f - s);
|
|
}
|
|
}
|
|
slen = strlen(s);
|
|
--
|
|
2.46.0
|
|
|
|
From 0256e00a9d077588bd3a39f5a1ef7e2eaa2911e4 Mon Sep 17 00:00:00 2001
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Date: Tue, 30 May 2023 16:42:18 +0200
|
|
Subject: [PATCH 10/19] awk: fix precedence of = relative to ==
|
|
|
|
Discovered while adding code to disallow assignments to non-lvalues
|
|
|
|
function old new delta
|
|
parse_expr 936 991 +55
|
|
.rodata 105243 105247 +4
|
|
------------------------------------------------------------------------------
|
|
(add/remove: 0/0 grow/shrink: 2/0 up/down: 59/0) Total: 59 bytes
|
|
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
|
|
---
|
|
editors/awk.c | 66 +++++++++++++++++++++++++++++++++++----------------
|
|
1 file changed, 45 insertions(+), 21 deletions(-)
|
|
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
|
index c49ad6e02..0f062dcdb 100644
|
|
--- a/editors/awk.c
|
|
+++ b/editors/awk.c
|
|
@@ -337,7 +337,9 @@ static void debug_parse_print_tc(uint32_t n)
|
|
#undef P
|
|
#undef PRIMASK
|
|
#undef PRIMASK2
|
|
-#define P(x) (x << 24)
|
|
+/* Smaller 'x' means _higher_ operator precedence */
|
|
+#define PRECEDENCE(x) (x << 24)
|
|
+#define P(x) PRECEDENCE(x)
|
|
#define PRIMASK 0x7F000000
|
|
#define PRIMASK2 0x7E000000
|
|
|
|
@@ -360,7 +362,7 @@ enum {
|
|
OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
|
|
OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
|
|
OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
|
|
- OC_DONE = 0x2800,
|
|
+ OC_CONST = 0x2800, OC_DONE = 0x2900,
|
|
|
|
ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
|
|
ST_WHILE = 0x3300
|
|
@@ -440,9 +442,9 @@ static const uint32_t tokeninfo[] ALIGN4 = {
|
|
#define TI_PREINC (OC_UNARY|xV|P(9)|'P')
|
|
#define TI_PREDEC (OC_UNARY|xV|P(9)|'M')
|
|
TI_PREINC, TI_PREDEC, OC_FIELD|xV|P(5),
|
|
- OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
|
|
- OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
|
|
- OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
|
|
+ OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(38), OC_REPLACE|NV|P(38)|'+', OC_REPLACE|NV|P(38)|'-',
|
|
+ OC_REPLACE|NV|P(38)|'*', OC_REPLACE|NV|P(38)|'/', OC_REPLACE|NV|P(38)|'%', OC_REPLACE|NV|P(38)|'&',
|
|
+ OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(38)|'&', OC_BINARY|NV|P(15)|'&',
|
|
OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
|
|
OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
|
|
#define TI_LESS (OC_COMPARE|VV|P(39)|2)
|
|
@@ -1301,7 +1303,7 @@ static uint32_t next_token(uint32_t expected)
|
|
save_tclass = tc;
|
|
save_info = t_info;
|
|
tc = TC_BINOPX;
|
|
- t_info = OC_CONCAT | SS | P(35);
|
|
+ t_info = OC_CONCAT | SS | PRECEDENCE(35);
|
|
}
|
|
|
|
t_tclass = tc;
|
|
@@ -1361,9 +1363,8 @@ static node *parse_expr(uint32_t term_tc)
|
|
{
|
|
node sn;
|
|
node *cn = &sn;
|
|
- node *vn, *glptr;
|
|
+ node *glptr;
|
|
uint32_t tc, expected_tc;
|
|
- var *v;
|
|
|
|
debug_printf_parse("%s() term_tc(%x):", __func__, term_tc);
|
|
debug_parse_print_tc(term_tc);
|
|
@@ -1374,11 +1375,12 @@ static node *parse_expr(uint32_t term_tc)
|
|
expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP | term_tc;
|
|
|
|
while (!((tc = next_token(expected_tc)) & term_tc)) {
|
|
+ node *vn;
|
|
|
|
if (glptr && (t_info == TI_LESS)) {
|
|
/* input redirection (<) attached to glptr node */
|
|
debug_printf_parse("%s: input redir\n", __func__);
|
|
- cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
|
|
+ cn = glptr->l.n = new_node(OC_CONCAT | SS | PRECEDENCE(37));
|
|
cn->a.n = glptr;
|
|
expected_tc = TS_OPERAND | TS_UOPPRE;
|
|
glptr = NULL;
|
|
@@ -1390,24 +1392,42 @@ static node *parse_expr(uint32_t term_tc)
|
|
* previous operators with higher priority */
|
|
vn = cn;
|
|
while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
|
|
- || ((t_info == vn->info) && t_info == TI_COLON)
|
|
+ || (t_info == vn->info && t_info == TI_COLON)
|
|
) {
|
|
vn = vn->a.n;
|
|
if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN);
|
|
}
|
|
if (t_info == TI_TERNARY)
|
|
//TODO: why?
|
|
- t_info += P(6);
|
|
+ t_info += PRECEDENCE(6);
|
|
cn = vn->a.n->r.n = new_node(t_info);
|
|
cn->a.n = vn->a.n;
|
|
if (tc & TS_BINOP) {
|
|
cn->l.n = vn;
|
|
-//FIXME: this is the place to detect and reject assignments to non-lvalues.
|
|
-//Currently we allow "assignments" to consts and temporaries, nonsense like this:
|
|
-// awk 'BEGIN { "qwe" = 1 }'
|
|
-// awk 'BEGIN { 7 *= 7 }'
|
|
-// awk 'BEGIN { length("qwe") = 1 }'
|
|
-// awk 'BEGIN { (1+1) += 3 }'
|
|
+
|
|
+ /* Prevent:
|
|
+ * awk 'BEGIN { "qwe" = 1 }'
|
|
+ * awk 'BEGIN { 7 *= 7 }'
|
|
+ * awk 'BEGIN { length("qwe") = 1 }'
|
|
+ * awk 'BEGIN { (1+1) += 3 }'
|
|
+ */
|
|
+ /* Assignment? (including *= and friends) */
|
|
+ if (((t_info & OPCLSMASK) == OC_MOVE)
|
|
+ || ((t_info & OPCLSMASK) == OC_REPLACE)
|
|
+ ) {
|
|
+ debug_printf_parse("%s: MOVE/REPLACE vn->info:%08x\n", __func__, vn->info);
|
|
+ /* Left side is a (variable or array element)
|
|
+ * or function argument
|
|
+ * or $FIELD ?
|
|
+ */
|
|
+ if ((vn->info & OPCLSMASK) != OC_VAR
|
|
+ && (vn->info & OPCLSMASK) != OC_FNARG
|
|
+ && (vn->info & OPCLSMASK) != OC_FIELD
|
|
+ ) {
|
|
+ syntax_error(EMSG_UNEXP_TOKEN); /* no. bad */
|
|
+ }
|
|
+ }
|
|
+
|
|
expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
|
|
if (t_info == TI_PGETLINE) {
|
|
/* it's a pipe */
|
|
@@ -1443,6 +1463,8 @@ static node *parse_expr(uint32_t term_tc)
|
|
/* one should be very careful with switch on tclass -
|
|
* only simple tclasses should be used (TC_xyz, not TS_xyz) */
|
|
switch (tc) {
|
|
+ var *v;
|
|
+
|
|
case TC_VARIABLE:
|
|
case TC_ARRAY:
|
|
debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
|
|
@@ -1463,14 +1485,14 @@ static node *parse_expr(uint32_t term_tc)
|
|
case TC_NUMBER:
|
|
case TC_STRING:
|
|
debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
|
|
- cn->info = OC_VAR;
|
|
+ cn->info = OC_CONST;
|
|
v = cn->l.v = xzalloc(sizeof(var));
|
|
- if (tc & TC_NUMBER)
|
|
+ if (tc & TC_NUMBER) {
|
|
setvar_i(v, t_double);
|
|
- else {
|
|
+ } else {
|
|
setvar_s(v, t_string);
|
|
- expected_tc &= ~TC_UOPPOST; /* "str"++ is not allowed */
|
|
}
|
|
+ expected_tc &= ~TC_UOPPOST; /* NUM++, "str"++ not allowed */
|
|
break;
|
|
|
|
case TC_REGEXP:
|
|
@@ -3124,6 +3146,8 @@ static var *evaluate(node *op, var *res)
|
|
|
|
/* -- recursive node type -- */
|
|
|
|
+ case XC( OC_CONST ):
|
|
+ debug_printf_eval("CONST ");
|
|
case XC( OC_VAR ):
|
|
debug_printf_eval("VAR\n");
|
|
L.v = op->l.v;
|
|
--
|
|
2.46.0
|
|
|
|
From 5f84c5633663f6ee8c9cc3a4608b86d4b56b39d6 Mon Sep 17 00:00:00 2001
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Date: Sat, 3 Jun 2023 00:39:33 +0200
|
|
Subject: [PATCH 11/19] awk: fix backslash handling in sub() builtins
|
|
|
|
function old new delta
|
|
awk_sub 559 544 -15
|
|
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
|
|
---
|
|
editors/awk.c | 41 +++++++++++++++++++----------------------
|
|
1 file changed, 19 insertions(+), 22 deletions(-)
|
|
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
|
index 0f062dcdb..f77573806 100644
|
|
--- a/editors/awk.c
|
|
+++ b/editors/awk.c
|
|
@@ -2492,7 +2492,7 @@ static char *awk_printf(node *n, size_t *len)
|
|
* store result into (dest), return number of substitutions.
|
|
* If nm = 0, replace all matches.
|
|
* If src or dst is NULL, use $0.
|
|
- * If subexp != 0, enable subexpression matching (\1-\9).
|
|
+ * If subexp != 0, enable subexpression matching (\0-\9).
|
|
*/
|
|
static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
|
|
{
|
|
@@ -2520,35 +2520,32 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int
|
|
residx += eo;
|
|
if (++match_no >= nm) {
|
|
const char *s;
|
|
- int nbs;
|
|
+ int bslash;
|
|
|
|
/* replace */
|
|
residx -= (eo - so);
|
|
- nbs = 0;
|
|
+ bslash = 0;
|
|
for (s = repl; *s; s++) {
|
|
- char c = resbuf[residx++] = *s;
|
|
- if (c == '\\') {
|
|
- nbs++;
|
|
- continue;
|
|
+ char c = *s;
|
|
+ if (c == '\\' && s[1]) {
|
|
+ bslash ^= 1;
|
|
+ if (bslash)
|
|
+ continue;
|
|
}
|
|
- if (c == '&' || (subexp && c >= '0' && c <= '9')) {
|
|
- int j;
|
|
- residx -= ((nbs + 3) >> 1);
|
|
- j = 0;
|
|
+ if ((!bslash && c == '&')
|
|
+ || (subexp && bslash && c >= '0' && c <= '9')
|
|
+ ) {
|
|
+ int n, j = 0;
|
|
if (c != '&') {
|
|
j = c - '0';
|
|
- nbs++;
|
|
}
|
|
- if (nbs % 2) {
|
|
- resbuf[residx++] = c;
|
|
- } else {
|
|
- int n = pmatch[j].rm_eo - pmatch[j].rm_so;
|
|
- resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
|
|
- memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
|
|
- residx += n;
|
|
- }
|
|
- }
|
|
- nbs = 0;
|
|
+ n = pmatch[j].rm_eo - pmatch[j].rm_so;
|
|
+ resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
|
|
+ memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
|
|
+ residx += n;
|
|
+ } else
|
|
+ resbuf[residx++] = c;
|
|
+ bslash = 0;
|
|
}
|
|
}
|
|
|
|
--
|
|
2.46.0
|
|
|
|
From f4789164e0716a8b1f98cf4149a3eb2dad485b8b Mon Sep 17 00:00:00 2001
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Date: Tue, 6 Jun 2023 12:48:11 +0200
|
|
Subject: [PATCH 12/19] awk: code shrink
|
|
|
|
function old new delta
|
|
awk_sub 544 548 +4
|
|
exec_builtin 1136 1130 -6
|
|
------------------------------------------------------------------------------
|
|
(add/remove: 0/0 grow/shrink: 1/1 up/down: 4/-6) Total: -2 bytes
|
|
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
|
|
---
|
|
editors/awk.c | 18 ++++++++++--------
|
|
1 file changed, 10 insertions(+), 8 deletions(-)
|
|
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
|
index f77573806..b3871ffc5 100644
|
|
--- a/editors/awk.c
|
|
+++ b/editors/awk.c
|
|
@@ -2494,7 +2494,7 @@ static char *awk_printf(node *n, size_t *len)
|
|
* If src or dst is NULL, use $0.
|
|
* If subexp != 0, enable subexpression matching (\0-\9).
|
|
*/
|
|
-static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
|
|
+static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest /*,int subexp*/)
|
|
{
|
|
char *resbuf;
|
|
const char *sp;
|
|
@@ -2502,6 +2502,8 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int
|
|
int regexec_flags;
|
|
regmatch_t pmatch[10];
|
|
regex_t sreg, *regex;
|
|
+ /* True only if called to implement gensub(): */
|
|
+ int subexp = (src != dest);
|
|
|
|
resbuf = NULL;
|
|
residx = 0;
|
|
@@ -2549,7 +2551,6 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int
|
|
}
|
|
}
|
|
|
|
- regexec_flags = REG_NOTBOL;
|
|
sp += eo;
|
|
if (match_no == nm)
|
|
break;
|
|
@@ -2570,6 +2571,7 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int
|
|
sp++;
|
|
residx++;
|
|
}
|
|
+ regexec_flags = REG_NOTBOL;
|
|
}
|
|
|
|
resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
|
|
@@ -2798,16 +2800,16 @@ static NOINLINE var *exec_builtin(node *op, var *res)
|
|
res = do_match(an[1], as[0]);
|
|
break;
|
|
|
|
- case B_ge:
|
|
- awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
|
|
+ case B_ge: /* gensub(regex, repl, matchnum, string) */
|
|
+ awk_sub(an[0], as[1], /*matchnum:*/getvar_i(av[2]), /*src:*/av[3], /*dst:*/res/*, TRUE*/);
|
|
break;
|
|
|
|
- case B_gs:
|
|
- setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
|
|
+ case B_gs: /* gsub(regex, repl, string) */
|
|
+ setvar_i(res, awk_sub(an[0], as[1], /*matchnum:all*/0, /*src:*/av[2], /*dst:*/av[2]/*, FALSE*/));
|
|
break;
|
|
|
|
- case B_su:
|
|
- setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
|
|
+ case B_su: /* sub(regex, repl, string) */
|
|
+ setvar_i(res, awk_sub(an[0], as[1], /*matchnum:first*/1, /*src:*/av[2], /*dst:*/av[2]/*, FALSE*/));
|
|
break;
|
|
}
|
|
|
|
--
|
|
2.46.0
|
|
|
|
From 113685fbcd4c3432ec9b640583d50ba8da2102e8 Mon Sep 17 00:00:00 2001
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Date: Wed, 7 Jun 2023 10:54:34 +0200
|
|
Subject: [PATCH 13/19] awk: fix SEGV on read error in -f PROGFILE
|
|
|
|
function old new delta
|
|
awk_main 829 843 +14
|
|
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
|
|
---
|
|
editors/awk.c | 4 ++--
|
|
1 file changed, 2 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
|
index b3871ffc5..df9b7fdc9 100644
|
|
--- a/editors/awk.c
|
|
+++ b/editors/awk.c
|
|
@@ -3609,8 +3609,6 @@ static var *evaluate(node *op, var *res)
|
|
#undef sreg
|
|
}
|
|
|
|
-/* -------- main & co. -------- */
|
|
-
|
|
static int awk_exit(void)
|
|
{
|
|
unsigned i;
|
|
@@ -3717,6 +3715,8 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
|
g_progname = llist_pop(&list_f);
|
|
fd = xopen_stdin(g_progname);
|
|
s = xmalloc_read(fd, NULL); /* it's NUL-terminated */
|
|
+ if (!s)
|
|
+ bb_perror_msg_and_die("read error from '%s'", g_progname);
|
|
close(fd);
|
|
parse_program(s);
|
|
free(s);
|
|
--
|
|
2.46.0
|
|
|
|
From 2ca39ffd447ca874fcea933194829717d5573247 Mon Sep 17 00:00:00 2001
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Date: Thu, 8 Jun 2023 10:42:39 +0200
|
|
Subject: [PATCH 14/19] awk: fix subst code to handle "start of word" pattern
|
|
correctly (needs REG_STARTEND)
|
|
|
|
function old new delta
|
|
awk_sub 637 714 +77
|
|
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
|
|
---
|
|
editors/awk.c | 49 ++++++++++++++++++++++++++++++++++++-------------
|
|
1 file changed, 36 insertions(+), 13 deletions(-)
|
|
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
|
index df9b7fdc9..171f0a7ea 100644
|
|
--- a/editors/awk.c
|
|
+++ b/editors/awk.c
|
|
@@ -2504,17 +2504,46 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest /*,in
|
|
regex_t sreg, *regex;
|
|
/* True only if called to implement gensub(): */
|
|
int subexp = (src != dest);
|
|
-
|
|
+#if defined(REG_STARTEND)
|
|
+ const char *src_string;
|
|
+ size_t src_strlen;
|
|
+ regexec_flags = REG_STARTEND;
|
|
+#else
|
|
+ regexec_flags = 0;
|
|
+#endif
|
|
resbuf = NULL;
|
|
residx = 0;
|
|
match_no = 0;
|
|
- regexec_flags = 0;
|
|
regex = as_regex(rn, &sreg);
|
|
sp = getvar_s(src ? src : intvar[F0]);
|
|
+#if defined(REG_STARTEND)
|
|
+ src_string = sp;
|
|
+ src_strlen = strlen(src_string);
|
|
+#endif
|
|
replen = strlen(repl);
|
|
- while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
|
|
- int so = pmatch[0].rm_so;
|
|
- int eo = pmatch[0].rm_eo;
|
|
+ for (;;) {
|
|
+ int so, eo;
|
|
+
|
|
+#if defined(REG_STARTEND)
|
|
+// REG_STARTEND: "This flag is a BSD extension, not present in POSIX"
|
|
+ size_t start_ofs = sp - src_string;
|
|
+ pmatch[0].rm_so = start_ofs;
|
|
+ pmatch[0].rm_eo = src_strlen;
|
|
+ if (regexec(regex, src_string, 10, pmatch, regexec_flags) != 0)
|
|
+ break;
|
|
+ eo = pmatch[0].rm_eo - start_ofs;
|
|
+ so = pmatch[0].rm_so - start_ofs;
|
|
+#else
|
|
+// BUG:
|
|
+// gsub(/\<b*/,"") on "abc" matches empty string at "a...",
|
|
+// advances sp one char (see "Empty match" comment later) to "bc"
|
|
+// ... and erroneously matches "b" even though it is NOT at the word start.
|
|
+ enum { start_ofs = 0 };
|
|
+ if (regexec(regex, sp, 10, pmatch, regexec_flags) != 0)
|
|
+ break;
|
|
+ so = pmatch[0].rm_so;
|
|
+ eo = pmatch[0].rm_eo;
|
|
+#endif
|
|
|
|
//bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
|
|
resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
|
|
@@ -2543,7 +2572,7 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest /*,in
|
|
}
|
|
n = pmatch[j].rm_eo - pmatch[j].rm_so;
|
|
resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
|
|
- memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
|
|
+ memcpy(resbuf + residx, sp + pmatch[j].rm_so - start_ofs, n);
|
|
residx += n;
|
|
} else
|
|
resbuf[residx++] = c;
|
|
@@ -2557,12 +2586,6 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest /*,in
|
|
if (eo == so) {
|
|
/* Empty match (e.g. "b*" will match anywhere).
|
|
* Advance by one char. */
|
|
-//BUG (bug 1333):
|
|
-//gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
|
|
-//... and will erroneously match "b" even though it is NOT at the word start.
|
|
-//we need REG_NOTBOW but it does not exist...
|
|
-//TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
|
|
-//it should be able to do it correctly.
|
|
/* Subtle: this is safe only because
|
|
* qrealloc allocated at least one extra byte */
|
|
resbuf[residx] = *sp;
|
|
@@ -2571,7 +2594,7 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest /*,in
|
|
sp++;
|
|
residx++;
|
|
}
|
|
- regexec_flags = REG_NOTBOL;
|
|
+ regexec_flags |= REG_NOTBOL;
|
|
}
|
|
|
|
resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
|
|
--
|
|
2.46.0
|
|
|
|
From 5353df91cba7b397b9407701681708d0a1518df6 Mon Sep 17 00:00:00 2001
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Date: Mon, 10 Jul 2023 17:25:21 +0200
|
|
Subject: [PATCH 15/19] Update applet size estimates
|
|
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
|
|
---
|
|
editors/awk.c | 2 +-
|
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
|
index 171f0a7ea..efdff2778 100644
|
|
--- a/editors/awk.c
|
|
+++ b/editors/awk.c
|
|
@@ -7,7 +7,7 @@
|
|
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
|
|
*/
|
|
//config:config AWK
|
|
-//config: bool "awk (23 kb)"
|
|
+//config: bool "awk (24 kb)"
|
|
//config: default y
|
|
//config: help
|
|
//config: Awk is used as a pattern scanning and processing language.
|
|
--
|
|
2.46.0
|
|
|
|
From 92ab29fcf04bc3ff3d3ad897f1c2463d8b8d1410 Mon Sep 17 00:00:00 2001
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Date: Mon, 2 Oct 2023 15:24:06 +0200
|
|
Subject: [PATCH 16/19] awk: implement -E; do not reorder -f and -e
|
|
|
|
function old new delta
|
|
awk_main 843 891 +48
|
|
next_input_file 243 261 +18
|
|
packed_usage 34631 34638 +7
|
|
.rodata 105391 105390 -1
|
|
------------------------------------------------------------------------------
|
|
(add/remove: 0/0 grow/shrink: 3/1 up/down: 73/-1) Total: 72 bytes
|
|
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
|
|
---
|
|
editors/awk.c | 113 +++++++++++++++++++++++++++++---------------------
|
|
1 file changed, 65 insertions(+), 48 deletions(-)
|
|
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
|
index efdff2778..bc95c4155 100644
|
|
--- a/editors/awk.c
|
|
+++ b/editors/awk.c
|
|
@@ -40,7 +40,7 @@
|
|
//usage:#define awk_full_usage "\n\n"
|
|
//usage: " -v VAR=VAL Set variable"
|
|
//usage: "\n -F SEP Use SEP as field separator"
|
|
-//usage: "\n -f FILE Read program from FILE"
|
|
+//usage: "\n -f/-E FILE Read program from FILE"
|
|
//usage: IF_FEATURE_AWK_GNU_EXTENSIONS(
|
|
//usage: "\n -e AWK_PROGRAM"
|
|
//usage: )
|
|
@@ -76,8 +76,8 @@
|
|
* 1: -argz
|
|
*/
|
|
#define OPTSTR_AWK "+" \
|
|
- "F:v:*f:*" \
|
|
- IF_FEATURE_AWK_GNU_EXTENSIONS("e:*") \
|
|
+ "F:v:f:" \
|
|
+ IF_FEATURE_AWK_GNU_EXTENSIONS("e:E:") \
|
|
"W:"
|
|
enum {
|
|
OPTBIT_F, /* define field separator */
|
|
@@ -560,6 +560,7 @@ struct globals {
|
|
var *Fields;
|
|
char *g_pos;
|
|
char g_saved_ch;
|
|
+ smallint got_program;
|
|
smallint icase;
|
|
smallint exiting;
|
|
smallint nextrec;
|
|
@@ -635,6 +636,7 @@ struct globals2 {
|
|
#define Fields (G1.Fields )
|
|
#define g_pos (G1.g_pos )
|
|
#define g_saved_ch (G1.g_saved_ch )
|
|
+#define got_program (G1.got_program )
|
|
#define icase (G1.icase )
|
|
#define exiting (G1.exiting )
|
|
#define nextrec (G1.nextrec )
|
|
@@ -2899,11 +2901,13 @@ static int next_input_file(void)
|
|
}
|
|
fname = getvar_s(findvar(iamarray(intvar[ARGV]), utoa(argind)));
|
|
if (fname && *fname) {
|
|
- /* "If a filename on the command line has the form
|
|
- * var=val it is treated as a variable assignment"
|
|
- */
|
|
- if (try_to_assign(fname))
|
|
- continue;
|
|
+ if (got_program != 2) { /* there was no -E option */
|
|
+ /* "If a filename on the command line has the form
|
|
+ * var=val it is treated as a variable assignment"
|
|
+ */
|
|
+ if (try_to_assign(fname))
|
|
+ continue;
|
|
+ }
|
|
iF.F = xfopen_stdin(fname);
|
|
setvar_i(intvar[ARGIND], argind);
|
|
break;
|
|
@@ -3659,13 +3663,7 @@ static int awk_exit(void)
|
|
int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
|
|
int awk_main(int argc UNUSED_PARAM, char **argv)
|
|
{
|
|
- unsigned opt;
|
|
- char *opt_F;
|
|
- llist_t *list_v = NULL;
|
|
- llist_t *list_f = NULL;
|
|
-#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
|
|
- llist_t *list_e = NULL;
|
|
-#endif
|
|
+ int ch;
|
|
int i;
|
|
|
|
INIT_G();
|
|
@@ -3714,49 +3712,68 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
|
}
|
|
}
|
|
}
|
|
- opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL);
|
|
- argv += optind;
|
|
- //argc -= optind;
|
|
- if (opt & OPT_W)
|
|
- bb_simple_error_msg("warning: option -W is ignored");
|
|
- if (opt & OPT_F) {
|
|
- unescape_string_in_place(opt_F);
|
|
- setvar_s(intvar[FS], opt_F);
|
|
- }
|
|
- while (list_v) {
|
|
- if (!try_to_assign(llist_pop(&list_v)))
|
|
- bb_show_usage();
|
|
- }
|
|
|
|
- /* Parse all supplied programs */
|
|
fnhash = hash_init();
|
|
ahash = hash_init();
|
|
- while (list_f) {
|
|
- int fd;
|
|
- char *s;
|
|
|
|
- g_progname = llist_pop(&list_f);
|
|
- fd = xopen_stdin(g_progname);
|
|
- s = xmalloc_read(fd, NULL); /* it's NUL-terminated */
|
|
- if (!s)
|
|
- bb_perror_msg_and_die("read error from '%s'", g_progname);
|
|
- close(fd);
|
|
- parse_program(s);
|
|
- free(s);
|
|
- }
|
|
- g_progname = "cmd. line";
|
|
+ /* Cannot use getopt32: need to preserve order of -e / -f / -E / -i */
|
|
+ while ((ch = getopt(argc, argv, OPTSTR_AWK)) >= 0) {
|
|
+ switch (ch) {
|
|
+ case 'F':
|
|
+ unescape_string_in_place(optarg);
|
|
+ setvar_s(intvar[FS], optarg);
|
|
+ break;
|
|
+ case 'v':
|
|
+ if (!try_to_assign(optarg))
|
|
+ bb_show_usage();
|
|
+ break;
|
|
+//TODO: implement -i LIBRARY, it is easy-ish
|
|
+ case 'E':
|
|
+ case 'f': {
|
|
+ int fd;
|
|
+ char *s;
|
|
+ g_progname = optarg;
|
|
+ fd = xopen_stdin(g_progname);
|
|
+ s = xmalloc_read(fd, NULL); /* it's NUL-terminated */
|
|
+ if (!s)
|
|
+ bb_perror_msg_and_die("read error from '%s'", g_progname);
|
|
+ close(fd);
|
|
+ parse_program(s);
|
|
+ free(s);
|
|
+ got_program = 1;
|
|
+ if (ch == 'E') {
|
|
+ got_program = 2;
|
|
+ goto stop_option_parsing;
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
|
|
- while (list_e) {
|
|
- parse_program(llist_pop(&list_e));
|
|
- }
|
|
+ case 'e':
|
|
+ g_progname = "cmd. line";
|
|
+ parse_program(optarg);
|
|
+ got_program = 1;
|
|
+ break;
|
|
#endif
|
|
-//FIXME: preserve order of -e and -f
|
|
-//TODO: implement -i LIBRARY and -E FILE too, they are easy-ish
|
|
- if (!(opt & (OPT_f | OPT_e))) {
|
|
+ case 'W':
|
|
+ bb_simple_error_msg("warning: option -W is ignored");
|
|
+ break;
|
|
+ default:
|
|
+//bb_error_msg("ch:%d", ch);
|
|
+ bb_show_usage();
|
|
+ }
|
|
+ }
|
|
+ stop_option_parsing:
|
|
+
|
|
+ argv += optind;
|
|
+ //argc -= optind;
|
|
+
|
|
+ if (!got_program) {
|
|
if (!*argv)
|
|
bb_show_usage();
|
|
+ g_progname = "cmd. line";
|
|
parse_program(*argv++);
|
|
}
|
|
+
|
|
/* Free unused parse structures */
|
|
//hash_free(fnhash); // ~250 bytes when empty, used only for function names
|
|
//^^^^^^^^^^^^^^^^^ does not work, hash_clear() inside SEGVs
|
|
--
|
|
2.46.0
|
|
|
|
From 789ccac7d9d1a9e433570ac9628992a01f946643 Mon Sep 17 00:00:00 2001
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Date: Sun, 31 Dec 2023 15:49:54 +0100
|
|
Subject: [PATCH 17/19] awk: fix handling of empty fields
|
|
|
|
Patch by M Rubon <rubonmtz@gmail.com>:
|
|
Busybox awk handles references to empty (not provided in the input)
|
|
fields differently during the first line of input, as compared to
|
|
subsequent lines.
|
|
|
|
$ (echo a ; echo b) | awk '$2 != 0' #wrong
|
|
b
|
|
|
|
No field $2 value is provided in the input. When awk references field
|
|
$2 for the "a" line, it is seen to have a different behaviour than
|
|
when it is referenced for the "b" line.
|
|
|
|
Problem in BusyBox v1.36.1 embedded in OpenWrt 23.05.0
|
|
Same problem also in 21.02 versions of OpenWrt
|
|
Same problem in BusyBox v1.37.0.git
|
|
|
|
I get the correct expected output from Ubuntu gawk and Debian mawk,
|
|
and from my fix.
|
|
will@dev:~$ (echo a ; echo b) | awk '$2 != 0' #correct
|
|
a
|
|
b
|
|
will@dev:~/busybox$ (echo a ; echo b ) | ./busybox awk '$2 != 0' #fixed
|
|
a
|
|
b
|
|
|
|
I built and poked into the source code at editors/awk.c The function
|
|
fsrealloc(int size) is core to allocating, initializing, reallocating,
|
|
and reinitializing fields, both real input line fields and imaginary
|
|
fields that the script references but do not exist in the input.
|
|
|
|
When fsrealloc() needs more field space than it has previously
|
|
allocated, it initializes those new fields differently than how they
|
|
are later reinitialized for the next input line. This works fine for
|
|
fields defined in the input, like $1, but does not work the first time
|
|
when there is no input for that field (e.g. field $99)
|
|
|
|
My one-line fix simply makes the initialization and clrvar()
|
|
reinitialization use the same value for .type. I am not sure if there
|
|
are regression tests to run, but I have not done those.
|
|
|
|
I'm not sure if I understand why clrvar() is not setting .type to a
|
|
default constant value, but in any case I have left that untouched.
|
|
|
|
function old new delta
|
|
------------------------------------------------------------------------------
|
|
(add/remove: 0/0 grow/shrink: 0/0 up/down: 0/0) Total: 0 bytes
|
|
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
|
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
|
|
---
|
|
editors/awk.c | 33 +++++++++++++++++----------------
|
|
1 file changed, 17 insertions(+), 16 deletions(-)
|
|
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
|
index bc95c4155..aa485c782 100644
|
|
--- a/editors/awk.c
|
|
+++ b/editors/awk.c
|
|
@@ -555,8 +555,9 @@ struct globals {
|
|
//we are reusing ahash as fdhash, via define (see later)
|
|
const char *g_progname;
|
|
int g_lineno;
|
|
- int nfields;
|
|
- unsigned maxfields;
|
|
+ int num_fields; /* number of existing $N's */
|
|
+ unsigned num_alloc_fields; /* current size of Fields[] */
|
|
+ /* NB: Fields[0] corresponds to $1, not to $0 */
|
|
var *Fields;
|
|
char *g_pos;
|
|
char g_saved_ch;
|
|
@@ -631,8 +632,8 @@ struct globals2 {
|
|
// for fdhash in execution stage.
|
|
#define g_progname (G1.g_progname )
|
|
#define g_lineno (G1.g_lineno )
|
|
-#define nfields (G1.nfields )
|
|
-#define maxfields (G1.maxfields )
|
|
+#define num_fields (G1.num_fields )
|
|
+#define num_alloc_fields (G1.num_alloc_fields)
|
|
#define Fields (G1.Fields )
|
|
#define g_pos (G1.g_pos )
|
|
#define g_saved_ch (G1.g_saved_ch )
|
|
@@ -1966,30 +1967,30 @@ static void fsrealloc(int size)
|
|
{
|
|
int i, newsize;
|
|
|
|
- if ((unsigned)size >= maxfields) {
|
|
+ if ((unsigned)size >= num_alloc_fields) {
|
|
/* Sanity cap, easier than catering for over/underflows */
|
|
if ((unsigned)size > 0xffffff)
|
|
bb_die_memory_exhausted();
|
|
|
|
- i = maxfields;
|
|
- maxfields = size + 16;
|
|
+ i = num_alloc_fields;
|
|
+ num_alloc_fields = size + 16;
|
|
|
|
- newsize = maxfields * sizeof(Fields[0]);
|
|
+ newsize = num_alloc_fields * sizeof(Fields[0]);
|
|
debug_printf_eval("fsrealloc: xrealloc(%p, %u)\n", Fields, newsize);
|
|
Fields = xrealloc(Fields, newsize);
|
|
debug_printf_eval("fsrealloc: Fields=%p..%p\n", Fields, (char*)Fields + newsize - 1);
|
|
/* ^^^ did Fields[] move? debug aid for L.v getting "upstaged" by R.v in evaluate() */
|
|
|
|
- for (; i < maxfields; i++) {
|
|
- Fields[i].type = VF_SPECIAL;
|
|
+ for (; i < num_alloc_fields; i++) {
|
|
+ Fields[i].type = VF_SPECIAL | VF_DIRTY;
|
|
Fields[i].string = NULL;
|
|
}
|
|
}
|
|
- /* if size < nfields, clear extra field variables */
|
|
- for (i = size; i < nfields; i++) {
|
|
+ /* if size < num_fields, clear extra field variables */
|
|
+ for (i = size; i < num_fields; i++) {
|
|
clrvar(Fields + i);
|
|
}
|
|
- nfields = size;
|
|
+ num_fields = size;
|
|
}
|
|
|
|
static int regexec1_nonempty(const regex_t *preg, const char *s, regmatch_t pmatch[])
|
|
@@ -2126,7 +2127,7 @@ static void split_f0(void)
|
|
/* set NF manually to avoid side effects */
|
|
clrvar(intvar[NF]);
|
|
intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
|
|
- intvar[NF]->number = nfields;
|
|
+ intvar[NF]->number = num_fields;
|
|
#undef fstrings
|
|
}
|
|
|
|
@@ -2976,7 +2977,7 @@ static var *evaluate(node *op, var *res)
|
|
syntax_error(EMSG_TOO_FEW_ARGS);
|
|
L.v = evaluate(op1, TMPVAR0);
|
|
/* Does L.v point to $n variable? */
|
|
- if ((size_t)(L.v - Fields) < maxfields) {
|
|
+ if ((size_t)(L.v - Fields) < num_alloc_fields) {
|
|
/* yes, remember where Fields[] is */
|
|
old_Fields_ptr = Fields;
|
|
}
|
|
@@ -3517,7 +3518,7 @@ static var *evaluate(node *op, var *res)
|
|
res = intvar[F0];
|
|
} else {
|
|
split_f0();
|
|
- if (i > nfields)
|
|
+ if (i > num_fields)
|
|
fsrealloc(i);
|
|
res = &Fields[i - 1];
|
|
}
|
|
--
|
|
2.46.0
|
|
|
|
From e1a68741067167dc4837e0a26d3d5c318a631fc7 Mon Sep 17 00:00:00 2001
|
|
From: Ron Yorston <rmy@pobox.com>
|
|
Date: Fri, 19 Jan 2024 15:41:17 +0000
|
|
Subject: [PATCH 18/19] awk: fix segfault when compiled by clang
|
|
|
|
A 32-bit build of BusyBox using clang segfaulted in the test
|
|
"awk assign while assign". Specifically, on line 7 of the test
|
|
input where the adjustment of the L.v pointer when the Fields
|
|
array was reallocated
|
|
|
|
L.v += Fields - old_Fields_ptr;
|
|
|
|
was out by 4 bytes.
|
|
|
|
Rearrange to code so both gcc and clang generate code that works.
|
|
|
|
Signed-off-by: Ron Yorston <rmy@pobox.com>
|
|
Signed-off-by: Bernhard Reutner-Fischer <rep.dot.nop@gmail.com>
|
|
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
|
|
---
|
|
editors/awk.c | 2 +-
|
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
|
index aa485c782..0981c6735 100644
|
|
--- a/editors/awk.c
|
|
+++ b/editors/awk.c
|
|
@@ -3006,7 +3006,7 @@ static var *evaluate(node *op, var *res)
|
|
if (old_Fields_ptr) {
|
|
//if (old_Fields_ptr != Fields)
|
|
// debug_printf_eval("L.v moved\n");
|
|
- L.v += Fields - old_Fields_ptr;
|
|
+ L.v = Fields + (L.v - old_Fields_ptr);
|
|
}
|
|
if (opinfo & OF_STR2) {
|
|
R.s = getvar_s(R.v);
|
|
--
|
|
2.46.0
|