CBL-Mariner/SPECS/busybox/CVE-2023-42365.patch

1660 строки
51 KiB
Diff

From 84ff1825dd82e8de45020e3def34d1430d8e5a99 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sat, 27 May 2023 16:16:58 +0200
Subject: [PATCH 02/19] awk: fix splitting with default FS
function old new delta
awk_split 543 544 +1
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
---
editors/awk.c | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 2af823808..b3748b502 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -2049,13 +2049,17 @@ static int awk_split(const char *s, node *spl, char **slist)
}
return n;
}
- /* space split */
+ /* space split: "In the special case that FS is a single space,
+ * fields are separated by runs of spaces and/or tabs and/or newlines"
+ */
while (*s) {
- s = skip_whitespace(s);
+ /* s = skip_whitespace(s); -- WRONG (also skips \v \f \r) */
+ while (*s == ' ' || *s == '\t' || *s == '\n')
+ s++;
if (!*s)
break;
n++;
- while (*s && !isspace(*s))
+ while (*s && !(*s == ' ' || *s == '\t' || *s == '\n'))
*s1++ = *s++;
*s1++ = '\0';
}
@@ -2304,7 +2308,6 @@ static int awk_getline(rstream *rsm, var *v)
setvar_i(intvar[ERRNO], errno);
}
b[p] = '\0';
-
} while (p > pp);
if (p == 0) {
@@ -3145,7 +3148,7 @@ static var *evaluate(node *op, var *res)
/* make sure that we never return a temp var */
if (L.v == TMPVAR0)
L.v = res;
- /* if source is a temporary string, jusk relink it to dest */
+ /* if source is a temporary string, just relink it to dest */
if (R.v == TMPVAR1
&& !(R.v->type & VF_NUMBER)
/* Why check !NUMBER? if R.v is a number but has cached R.v->string,
--
2.46.0
From 528808bcd25f7d237874dc82fad2adcddf354b42 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sat, 27 May 2023 18:05:42 +0200
Subject: [PATCH 03/19] awk: get rid of one indirection level for iF (input
file structure)
function old new delta
try_to_assign - 91 +91
next_input_file 214 216 +2
awk_main 827 826 -1
evaluate 3403 3396 -7
is_assignment 91 - -91
------------------------------------------------------------------------------
(add/remove: 1/1 grow/shrink: 1/2 up/down: 93/-99) Total: -6 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
---
editors/awk.c | 78 +++++++++++++++++++++++++++------------------------
1 file changed, 41 insertions(+), 37 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index b3748b502..22f52417d 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -546,7 +546,6 @@ struct globals {
chain beginseq, mainseq, endseq;
chain *seq;
node *break_ptr, *continue_ptr;
- rstream *iF;
xhash *ahash; /* argument names, used only while parsing function bodies */
xhash *fnhash; /* function names, used only in parsing stage */
xhash *vhash; /* variables and arrays */
@@ -579,11 +578,12 @@ struct globals2 {
var *intvar[NUM_INTERNAL_VARS]; /* often used */
+ rstream iF;
+
/* former statics from various functions */
char *split_f0__fstrings;
- rstream next_input_file__rsm;
- smallint next_input_file__files_happen;
+ smallint next_input_file__input_file_seen;
smalluint exitcode;
@@ -618,7 +618,6 @@ struct globals2 {
#define seq (G1.seq )
#define break_ptr (G1.break_ptr )
#define continue_ptr (G1.continue_ptr)
-#define iF (G1.iF )
#define ahash (G1.ahash )
#define fnhash (G1.fnhash )
#define vhash (G1.vhash )
@@ -644,6 +643,7 @@ struct globals2 {
#define t_string (G.t_string )
#define t_lineno (G.t_lineno )
#define intvar (G.intvar )
+#define iF (G.iF )
#define fsplitter (G.fsplitter )
#define rsplitter (G.rsplitter )
#define g_buf (G.g_buf )
@@ -2799,7 +2799,7 @@ static NOINLINE var *exec_builtin(node *op, var *res)
/* if expr looks like "var=value", perform assignment and return 1,
* otherwise return 0 */
-static int is_assignment(const char *expr)
+static int try_to_assign(const char *expr)
{
char *exprc, *val;
@@ -2819,39 +2819,44 @@ static int is_assignment(const char *expr)
}
/* switch to next input file */
-static rstream *next_input_file(void)
+static int next_input_file(void)
{
-#define rsm (G.next_input_file__rsm)
-#define files_happen (G.next_input_file__files_happen)
-
- const char *fname, *ind;
+#define input_file_seen (G.next_input_file__input_file_seen)
+ const char *fname;
- if (rsm.F)
- fclose(rsm.F);
- rsm.F = NULL;
- rsm.pos = rsm.adv = 0;
+ if (iF.F) {
+ fclose(iF.F);
+ iF.F = NULL;
+ iF.pos = iF.adv = 0;
+ }
for (;;) {
+ const char *ind;
+
if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
- if (files_happen)
- return NULL;
+ if (input_file_seen)
+ return FALSE;
fname = "-";
- rsm.F = stdin;
+ iF.F = stdin;
break;
}
ind = getvar_s(incvar(intvar[ARGIND]));
fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
- if (fname && *fname && !is_assignment(fname)) {
- rsm.F = xfopen_stdin(fname);
+ if (fname && *fname) {
+ /* "If a filename on the command line has the form
+ * var=val it is treated as a variable assignment"
+ */
+ if (try_to_assign(fname))
+ continue;
+ iF.F = xfopen_stdin(fname);
break;
}
}
- files_happen = TRUE;
setvar_s(intvar[FILENAME], fname);
- return &rsm;
-#undef rsm
-#undef files_happen
+ input_file_seen = TRUE;
+ return TRUE;
+#undef input_file_seen
}
/*
@@ -3231,12 +3236,12 @@ static var *evaluate(node *op, var *res)
}
}
} else {
- if (!iF)
- iF = next_input_file();
- rsm = iF;
+ if (!iF.F)
+ next_input_file();
+ rsm = &iF;
}
- if (!rsm || !rsm->F) {
+ if (!rsm->F) {
setvar_i(intvar[ERRNO], errno);
setvar_i(res, -1);
break;
@@ -3659,7 +3664,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
setvar_s(intvar[FS], opt_F);
}
while (list_v) {
- if (!is_assignment(llist_pop(&list_v)))
+ if (!try_to_assign(llist_pop(&list_v)))
bb_show_usage();
}
@@ -3718,15 +3723,14 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
awk_exit();
/* input file could already be opened in BEGIN block */
- if (!iF)
- iF = next_input_file();
-
- /* passing through input files */
- while (iF) {
+ if (!iF.F)
+ goto next_file; /* no, it wasn't, go try opening */
+ /* Iterate over input files */
+ for (;;) {
nextfile = FALSE;
setvar_i(intvar[FNR], 0);
- while ((i = awk_getline(iF, intvar[F0])) > 0) {
+ while ((i = awk_getline(&iF, intvar[F0])) > 0) {
nextrec = FALSE;
incvar(intvar[NR]);
incvar(intvar[FNR]);
@@ -3735,11 +3739,11 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
if (nextfile)
break;
}
-
if (i < 0)
syntax_error(strerror(errno));
-
- iF = next_input_file();
+ next_file:
+ if (!next_input_file())
+ break;
}
awk_exit();
--
2.46.0
From 5c8a9dfd976493e4351abadf6686b621763b564c Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sat, 27 May 2023 18:21:38 +0200
Subject: [PATCH 04/19] awk: remove a local variable "caching" a struct member
Since we take its address, the variable lives on stack (not a GPR).
Thus, nothing is improved by caching it.
function old new delta
awk_getline 642 639 -3
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
---
editors/awk.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 22f52417d..4a0eb9281 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -2236,7 +2236,7 @@ static int awk_getline(rstream *rsm, var *v)
{
char *b;
regmatch_t pmatch[1];
- int size, a, p, pp = 0;
+ int a, p, pp = 0;
int fd, so, eo, r, rp;
char c, *m, *s;
@@ -2249,12 +2249,11 @@ static int awk_getline(rstream *rsm, var *v)
m = rsm->buffer;
a = rsm->adv;
p = rsm->pos;
- size = rsm->size;
c = (char) rsplitter.n.info;
rp = 0;
if (!m)
- m = qrealloc(m, 256, &size);
+ m = qrealloc(m, 256, &rsm->size);
do {
b = m + a;
@@ -2298,10 +2297,10 @@ static int awk_getline(rstream *rsm, var *v)
a = 0;
}
- m = qrealloc(m, a+p+128, &size);
+ m = qrealloc(m, a+p+128, &rsm->size);
b = m + a;
pp = p;
- p += safe_read(fd, b+p, size-p-1);
+ p += safe_read(fd, b+p, rsm->size - p - 1);
if (p < pp) {
p = 0;
r = 0;
@@ -2325,7 +2324,6 @@ static int awk_getline(rstream *rsm, var *v)
rsm->buffer = m;
rsm->adv = a + eo;
rsm->pos = p - eo;
- rsm->size = size;
debug_printf_eval("returning from %s(): %d\n", __func__, r);
--
2.46.0
From 21dce1c3c3d74a60959b6d8b0c76f38d463b8187 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sat, 27 May 2023 19:11:28 +0200
Subject: [PATCH 05/19] awk: do not read ARGIND, only set it (gawk compat)
function old new delta
next_input_file 216 243 +27
evaluate 3396 3402 +6
awk_main 826 829 +3
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 3/0 up/down: 36/0) Total: 36 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
---
editors/awk.c | 19 ++++++++++++++-----
1 file changed, 14 insertions(+), 5 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 4a0eb9281..77e0b0aab 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -583,6 +583,7 @@ struct globals2 {
/* former statics from various functions */
char *split_f0__fstrings;
+ unsigned next_input_file__argind;
smallint next_input_file__input_file_seen;
smalluint exitcode;
@@ -2820,6 +2821,7 @@ static int try_to_assign(const char *expr)
static int next_input_file(void)
{
#define input_file_seen (G.next_input_file__input_file_seen)
+#define argind (G.next_input_file__argind)
const char *fname;
if (iF.F) {
@@ -2829,17 +2831,22 @@ static int next_input_file(void)
}
for (;;) {
- const char *ind;
-
- if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
+ /* GNU Awk 5.1.1 does not _read_ ARGIND (but does read ARGC).
+ * It only sets ARGIND to 1, 2, 3... for every command-line filename
+ * (VAR=VAL params cause a gap in numbering).
+ * If there are none and stdin is used, then ARGIND is not modified:
+ * if it is set by e.g. 'BEGIN { ARGIND="foo" }', that value will
+ * still be there.
+ */
+ argind++;
+ if (argind >= getvar_i(intvar[ARGC])) {
if (input_file_seen)
return FALSE;
fname = "-";
iF.F = stdin;
break;
}
- ind = getvar_s(incvar(intvar[ARGIND]));
- fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
+ fname = getvar_s(findvar(iamarray(intvar[ARGV]), utoa(argind)));
if (fname && *fname) {
/* "If a filename on the command line has the form
* var=val it is treated as a variable assignment"
@@ -2847,6 +2854,7 @@ static int next_input_file(void)
if (try_to_assign(fname))
continue;
iF.F = xfopen_stdin(fname);
+ setvar_i(intvar[ARGIND], argind);
break;
}
}
@@ -2854,6 +2862,7 @@ static int next_input_file(void)
setvar_s(intvar[FILENAME], fname);
input_file_seen = TRUE;
return TRUE;
+#undef argind
#undef input_file_seen
}
--
2.46.0
From b76b420b5da1aadad823faf12327b610614f5951 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sun, 28 May 2023 17:25:56 +0200
Subject: [PATCH 06/19] awk: fix closing of non-opened file
function old new delta
setvar_ERRNO - 53 +53
.rodata 105252 105246 -6
awk_getline 639 620 -19
evaluate 3402 3377 -25
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 0/3 up/down: 53/-50) Total: 3 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
---
editors/awk.c | 23 +++++++++++++++--------
1 file changed, 15 insertions(+), 8 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 77e0b0aab..83a08aa95 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -1006,6 +1006,11 @@ static var *setvar_i(var *v, double value)
return v;
}
+static void setvar_ERRNO(void)
+{
+ setvar_i(intvar[ERRNO], errno);
+}
+
static const char *getvar_s(var *v)
{
/* if v is numeric and has no cached string, convert it to string */
@@ -2305,7 +2310,7 @@ static int awk_getline(rstream *rsm, var *v)
if (p < pp) {
p = 0;
r = 0;
- setvar_i(intvar[ERRNO], errno);
+ setvar_ERRNO();
}
b[p] = '\0';
} while (p > pp);
@@ -3249,7 +3254,7 @@ static var *evaluate(node *op, var *res)
}
if (!rsm->F) {
- setvar_i(intvar[ERRNO], errno);
+ setvar_ERRNO();
setvar_i(res, -1);
break;
}
@@ -3388,16 +3393,18 @@ static var *evaluate(node *op, var *res)
*/
if (rsm->F)
err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
-//TODO: fix this case:
-// $ awk 'BEGIN { print close(""); print ERRNO }'
-// -1
-// close of redirection that was never opened
-// (we print 0, 0)
free(rsm->buffer);
hash_remove(fdhash, L.s);
+ } else {
+ err = -1;
+ /* gawk 'BEGIN { print close(""); print ERRNO }'
+ * -1
+ * close of redirection that was never opened
+ */
+ errno = ENOENT;
}
if (err)
- setvar_i(intvar[ERRNO], errno);
+ setvar_ERRNO();
R_d = (double)err;
break;
}
--
2.46.0
From 05e60007d42b8e4005085a22e122ef70bf888fa5 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sun, 28 May 2023 17:51:59 +0200
Subject: [PATCH 07/19] awk: code shrink
function old new delta
awk_getline 620 591 -29
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
---
editors/awk.c | 47 ++++++++++++++++++++++++-----------------------
1 file changed, 24 insertions(+), 23 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 83a08aa95..eb419e063 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -2242,9 +2242,9 @@ static int awk_getline(rstream *rsm, var *v)
{
char *b;
regmatch_t pmatch[1];
- int a, p, pp = 0;
- int fd, so, eo, r, rp;
- char c, *m, *s;
+ int p, pp;
+ int fd, so, eo, retval, rp;
+ char *m, *s;
debug_printf_eval("entered %s()\n", __func__);
@@ -2253,22 +2253,22 @@ static int awk_getline(rstream *rsm, var *v)
*/
fd = fileno(rsm->F);
m = rsm->buffer;
- a = rsm->adv;
- p = rsm->pos;
- c = (char) rsplitter.n.info;
- rp = 0;
-
if (!m)
m = qrealloc(m, 256, &rsm->size);
+ p = rsm->pos;
+ rp = 0;
+ pp = 0;
do {
- b = m + a;
+ b = m + rsm->adv;
so = eo = p;
- r = 1;
+ retval = 1;
if (p > 0) {
+ char c = (char) rsplitter.n.info;
if (rsplitter.n.info == TI_REGEXP) {
if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
- b, 1, pmatch, 0) == 0) {
+ b, 1, pmatch, 0) == 0
+ ) {
so = pmatch[0].rm_so;
eo = pmatch[0].rm_eo;
if (b[eo] != '\0')
@@ -2297,43 +2297,44 @@ static int awk_getline(rstream *rsm, var *v)
}
}
- if (a > 0) {
- memmove(m, m+a, p+1);
+ if (rsm->adv > 0) {
+ memmove(m, m+rsm->adv, p+1);
b = m;
- a = 0;
+ rsm->adv = 0;
}
- m = qrealloc(m, a+p+128, &rsm->size);
- b = m + a;
+ b = m = qrealloc(m, p+128, &rsm->size);
pp = p;
p += safe_read(fd, b+p, rsm->size - p - 1);
if (p < pp) {
p = 0;
- r = 0;
+ retval = 0;
setvar_ERRNO();
}
b[p] = '\0';
} while (p > pp);
if (p == 0) {
- r--;
+ retval--;
} else {
- c = b[so]; b[so] = '\0';
+ char c = b[so];
+ b[so] = '\0';
setvar_s(v, b+rp);
v->type |= VF_USER;
b[so] = c;
- c = b[eo]; b[eo] = '\0';
+ c = b[eo];
+ b[eo] = '\0';
setvar_s(intvar[RT], b+so);
b[eo] = c;
}
rsm->buffer = m;
- rsm->adv = a + eo;
+ rsm->adv += eo;
rsm->pos = p - eo;
- debug_printf_eval("returning from %s(): %d\n", __func__, r);
+ debug_printf_eval("returning from %s(): %d\n", __func__, retval);
- return r;
+ return retval;
}
/* formatted output into an allocated buffer, return ptr to buffer */
--
2.46.0
From 4d7339204f9f823f592562d9903db3ae79a6c640 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sun, 28 May 2023 18:00:51 +0200
Subject: [PATCH 08/19] awk: shrink - use setvar_sn() to set variables from
non-NUL terminated strings
function old new delta
setvar_sn - 39 +39
exec_builtin 1145 1136 -9
awk_getline 591 559 -32
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 0/2 up/down: 39/-41) Total: -2 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
---
editors/awk.c | 23 +++++++++--------------
1 file changed, 9 insertions(+), 14 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index eb419e063..b5774a339 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -979,6 +979,11 @@ static var *setvar_s(var *v, const char *value)
return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
}
+static var *setvar_sn(var *v, const char *value, int len)
+{
+ return setvar_p(v, (value && *value && len > 0) ? xstrndup(value, len) : NULL);
+}
+
/* same as setvar_s but sets USER flag */
static var *setvar_u(var *v, const char *value)
{
@@ -2317,15 +2322,9 @@ static int awk_getline(rstream *rsm, var *v)
if (p == 0) {
retval--;
} else {
- char c = b[so];
- b[so] = '\0';
- setvar_s(v, b+rp);
+ setvar_sn(v, b+rp, so-rp);
v->type |= VF_USER;
- b[so] = c;
- c = b[eo];
- b[eo] = '\0';
- setvar_s(intvar[RT], b+so);
- b[eo] = c;
+ setvar_sn(intvar[RT], b+so, eo-so);
}
rsm->buffer = m;
@@ -2677,8 +2676,6 @@ static NOINLINE var *exec_builtin(node *op, var *res)
}
case B_ss: {
- char *s;
-
l = strlen(as[0]);
i = getvar_i(av[1]) - 1;
if (i > l)
@@ -2688,8 +2685,7 @@ static NOINLINE var *exec_builtin(node *op, var *res)
n = (nargs > 2) ? getvar_i(av[2]) : l-i;
if (n < 0)
n = 0;
- s = xstrndup(as[0]+i, n);
- setvar_p(res, s);
+ setvar_sn(res, as[0]+i, n);
break;
}
@@ -2766,8 +2762,7 @@ static NOINLINE var *exec_builtin(node *op, var *res)
i = strftime(g_buf, MAXVARFMT,
((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
localtime(&tt));
- g_buf[i] = '\0';
- setvar_s(res, g_buf);
+ setvar_sn(res, g_buf, i);
break;
case B_mt:
--
2.46.0
From 721bf6eaf4739a2865b071b38d3478f334234d26 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Mon, 29 May 2023 10:55:40 +0200
Subject: [PATCH 09/19] awk: printf(INVALID_FMT) prints it verbatim
function old new delta
awk_printf 628 640 +12
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
---
editors/awk.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index b5774a339..c49ad6e02 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -2389,7 +2389,7 @@ static char *awk_printf(node *n, size_t *len)
while (1) {
if (isalpha(c))
break;
- if (c == '*')
+ if (c == '*') /* gawk supports %*d and %*.*f, we don't... */
syntax_error("%*x formats are not supported");
c = *++f;
if (!c) { /* "....%...." and no letter found after % */
@@ -2422,12 +2422,18 @@ static char *awk_printf(node *n, size_t *len)
double d = getvar_i(arg);
if (strchr("diouxX", c)) {
//TODO: make it wider here (%x -> %llx etc)?
+//Can even print the value into a temp string with %.0f,
+//then replace diouxX with s and print that string.
+//This will correctly print even very large numbers,
+//but some replacements are not equivalent:
+//%09d -> %09s: breaks zero-padding;
+//%+d -> %+s: won't prepend +; etc
s = xasprintf(s, (int)d);
} else if (strchr("eEfFgGaA", c)) {
s = xasprintf(s, d);
} else {
-//TODO: GNU Awk 5.0.1: printf "%W" prints "%W", does not error out
- syntax_error(EMSG_INV_FMT);
+ /* gawk 5.1.1 printf("%W") prints "%W", does not error out */
+ s = xstrndup(s, f - s);
}
}
slen = strlen(s);
--
2.46.0
From 0256e00a9d077588bd3a39f5a1ef7e2eaa2911e4 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Tue, 30 May 2023 16:42:18 +0200
Subject: [PATCH 10/19] awk: fix precedence of = relative to ==
Discovered while adding code to disallow assignments to non-lvalues
function old new delta
parse_expr 936 991 +55
.rodata 105243 105247 +4
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 2/0 up/down: 59/0) Total: 59 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
---
editors/awk.c | 66 +++++++++++++++++++++++++++++++++++----------------
1 file changed, 45 insertions(+), 21 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index c49ad6e02..0f062dcdb 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -337,7 +337,9 @@ static void debug_parse_print_tc(uint32_t n)
#undef P
#undef PRIMASK
#undef PRIMASK2
-#define P(x) (x << 24)
+/* Smaller 'x' means _higher_ operator precedence */
+#define PRECEDENCE(x) (x << 24)
+#define P(x) PRECEDENCE(x)
#define PRIMASK 0x7F000000
#define PRIMASK2 0x7E000000
@@ -360,7 +362,7 @@ enum {
OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
- OC_DONE = 0x2800,
+ OC_CONST = 0x2800, OC_DONE = 0x2900,
ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
ST_WHILE = 0x3300
@@ -440,9 +442,9 @@ static const uint32_t tokeninfo[] ALIGN4 = {
#define TI_PREINC (OC_UNARY|xV|P(9)|'P')
#define TI_PREDEC (OC_UNARY|xV|P(9)|'M')
TI_PREINC, TI_PREDEC, OC_FIELD|xV|P(5),
- OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
- OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
- OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
+ OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(38), OC_REPLACE|NV|P(38)|'+', OC_REPLACE|NV|P(38)|'-',
+ OC_REPLACE|NV|P(38)|'*', OC_REPLACE|NV|P(38)|'/', OC_REPLACE|NV|P(38)|'%', OC_REPLACE|NV|P(38)|'&',
+ OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(38)|'&', OC_BINARY|NV|P(15)|'&',
OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
#define TI_LESS (OC_COMPARE|VV|P(39)|2)
@@ -1301,7 +1303,7 @@ static uint32_t next_token(uint32_t expected)
save_tclass = tc;
save_info = t_info;
tc = TC_BINOPX;
- t_info = OC_CONCAT | SS | P(35);
+ t_info = OC_CONCAT | SS | PRECEDENCE(35);
}
t_tclass = tc;
@@ -1361,9 +1363,8 @@ static node *parse_expr(uint32_t term_tc)
{
node sn;
node *cn = &sn;
- node *vn, *glptr;
+ node *glptr;
uint32_t tc, expected_tc;
- var *v;
debug_printf_parse("%s() term_tc(%x):", __func__, term_tc);
debug_parse_print_tc(term_tc);
@@ -1374,11 +1375,12 @@ static node *parse_expr(uint32_t term_tc)
expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP | term_tc;
while (!((tc = next_token(expected_tc)) & term_tc)) {
+ node *vn;
if (glptr && (t_info == TI_LESS)) {
/* input redirection (<) attached to glptr node */
debug_printf_parse("%s: input redir\n", __func__);
- cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
+ cn = glptr->l.n = new_node(OC_CONCAT | SS | PRECEDENCE(37));
cn->a.n = glptr;
expected_tc = TS_OPERAND | TS_UOPPRE;
glptr = NULL;
@@ -1390,24 +1392,42 @@ static node *parse_expr(uint32_t term_tc)
* previous operators with higher priority */
vn = cn;
while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
- || ((t_info == vn->info) && t_info == TI_COLON)
+ || (t_info == vn->info && t_info == TI_COLON)
) {
vn = vn->a.n;
if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN);
}
if (t_info == TI_TERNARY)
//TODO: why?
- t_info += P(6);
+ t_info += PRECEDENCE(6);
cn = vn->a.n->r.n = new_node(t_info);
cn->a.n = vn->a.n;
if (tc & TS_BINOP) {
cn->l.n = vn;
-//FIXME: this is the place to detect and reject assignments to non-lvalues.
-//Currently we allow "assignments" to consts and temporaries, nonsense like this:
-// awk 'BEGIN { "qwe" = 1 }'
-// awk 'BEGIN { 7 *= 7 }'
-// awk 'BEGIN { length("qwe") = 1 }'
-// awk 'BEGIN { (1+1) += 3 }'
+
+ /* Prevent:
+ * awk 'BEGIN { "qwe" = 1 }'
+ * awk 'BEGIN { 7 *= 7 }'
+ * awk 'BEGIN { length("qwe") = 1 }'
+ * awk 'BEGIN { (1+1) += 3 }'
+ */
+ /* Assignment? (including *= and friends) */
+ if (((t_info & OPCLSMASK) == OC_MOVE)
+ || ((t_info & OPCLSMASK) == OC_REPLACE)
+ ) {
+ debug_printf_parse("%s: MOVE/REPLACE vn->info:%08x\n", __func__, vn->info);
+ /* Left side is a (variable or array element)
+ * or function argument
+ * or $FIELD ?
+ */
+ if ((vn->info & OPCLSMASK) != OC_VAR
+ && (vn->info & OPCLSMASK) != OC_FNARG
+ && (vn->info & OPCLSMASK) != OC_FIELD
+ ) {
+ syntax_error(EMSG_UNEXP_TOKEN); /* no. bad */
+ }
+ }
+
expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
if (t_info == TI_PGETLINE) {
/* it's a pipe */
@@ -1443,6 +1463,8 @@ static node *parse_expr(uint32_t term_tc)
/* one should be very careful with switch on tclass -
* only simple tclasses should be used (TC_xyz, not TS_xyz) */
switch (tc) {
+ var *v;
+
case TC_VARIABLE:
case TC_ARRAY:
debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
@@ -1463,14 +1485,14 @@ static node *parse_expr(uint32_t term_tc)
case TC_NUMBER:
case TC_STRING:
debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
- cn->info = OC_VAR;
+ cn->info = OC_CONST;
v = cn->l.v = xzalloc(sizeof(var));
- if (tc & TC_NUMBER)
+ if (tc & TC_NUMBER) {
setvar_i(v, t_double);
- else {
+ } else {
setvar_s(v, t_string);
- expected_tc &= ~TC_UOPPOST; /* "str"++ is not allowed */
}
+ expected_tc &= ~TC_UOPPOST; /* NUM++, "str"++ not allowed */
break;
case TC_REGEXP:
@@ -3124,6 +3146,8 @@ static var *evaluate(node *op, var *res)
/* -- recursive node type -- */
+ case XC( OC_CONST ):
+ debug_printf_eval("CONST ");
case XC( OC_VAR ):
debug_printf_eval("VAR\n");
L.v = op->l.v;
--
2.46.0
From 5f84c5633663f6ee8c9cc3a4608b86d4b56b39d6 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sat, 3 Jun 2023 00:39:33 +0200
Subject: [PATCH 11/19] awk: fix backslash handling in sub() builtins
function old new delta
awk_sub 559 544 -15
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
---
editors/awk.c | 41 +++++++++++++++++++----------------------
1 file changed, 19 insertions(+), 22 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 0f062dcdb..f77573806 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -2492,7 +2492,7 @@ static char *awk_printf(node *n, size_t *len)
* store result into (dest), return number of substitutions.
* If nm = 0, replace all matches.
* If src or dst is NULL, use $0.
- * If subexp != 0, enable subexpression matching (\1-\9).
+ * If subexp != 0, enable subexpression matching (\0-\9).
*/
static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
{
@@ -2520,35 +2520,32 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int
residx += eo;
if (++match_no >= nm) {
const char *s;
- int nbs;
+ int bslash;
/* replace */
residx -= (eo - so);
- nbs = 0;
+ bslash = 0;
for (s = repl; *s; s++) {
- char c = resbuf[residx++] = *s;
- if (c == '\\') {
- nbs++;
- continue;
+ char c = *s;
+ if (c == '\\' && s[1]) {
+ bslash ^= 1;
+ if (bslash)
+ continue;
}
- if (c == '&' || (subexp && c >= '0' && c <= '9')) {
- int j;
- residx -= ((nbs + 3) >> 1);
- j = 0;
+ if ((!bslash && c == '&')
+ || (subexp && bslash && c >= '0' && c <= '9')
+ ) {
+ int n, j = 0;
if (c != '&') {
j = c - '0';
- nbs++;
}
- if (nbs % 2) {
- resbuf[residx++] = c;
- } else {
- int n = pmatch[j].rm_eo - pmatch[j].rm_so;
- resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
- memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
- residx += n;
- }
- }
- nbs = 0;
+ n = pmatch[j].rm_eo - pmatch[j].rm_so;
+ resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
+ memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
+ residx += n;
+ } else
+ resbuf[residx++] = c;
+ bslash = 0;
}
}
--
2.46.0
From f4789164e0716a8b1f98cf4149a3eb2dad485b8b Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Tue, 6 Jun 2023 12:48:11 +0200
Subject: [PATCH 12/19] awk: code shrink
function old new delta
awk_sub 544 548 +4
exec_builtin 1136 1130 -6
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 1/1 up/down: 4/-6) Total: -2 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
---
editors/awk.c | 18 ++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index f77573806..b3871ffc5 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -2494,7 +2494,7 @@ static char *awk_printf(node *n, size_t *len)
* If src or dst is NULL, use $0.
* If subexp != 0, enable subexpression matching (\0-\9).
*/
-static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
+static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest /*,int subexp*/)
{
char *resbuf;
const char *sp;
@@ -2502,6 +2502,8 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int
int regexec_flags;
regmatch_t pmatch[10];
regex_t sreg, *regex;
+ /* True only if called to implement gensub(): */
+ int subexp = (src != dest);
resbuf = NULL;
residx = 0;
@@ -2549,7 +2551,6 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int
}
}
- regexec_flags = REG_NOTBOL;
sp += eo;
if (match_no == nm)
break;
@@ -2570,6 +2571,7 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int
sp++;
residx++;
}
+ regexec_flags = REG_NOTBOL;
}
resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
@@ -2798,16 +2800,16 @@ static NOINLINE var *exec_builtin(node *op, var *res)
res = do_match(an[1], as[0]);
break;
- case B_ge:
- awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
+ case B_ge: /* gensub(regex, repl, matchnum, string) */
+ awk_sub(an[0], as[1], /*matchnum:*/getvar_i(av[2]), /*src:*/av[3], /*dst:*/res/*, TRUE*/);
break;
- case B_gs:
- setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
+ case B_gs: /* gsub(regex, repl, string) */
+ setvar_i(res, awk_sub(an[0], as[1], /*matchnum:all*/0, /*src:*/av[2], /*dst:*/av[2]/*, FALSE*/));
break;
- case B_su:
- setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
+ case B_su: /* sub(regex, repl, string) */
+ setvar_i(res, awk_sub(an[0], as[1], /*matchnum:first*/1, /*src:*/av[2], /*dst:*/av[2]/*, FALSE*/));
break;
}
--
2.46.0
From 113685fbcd4c3432ec9b640583d50ba8da2102e8 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Wed, 7 Jun 2023 10:54:34 +0200
Subject: [PATCH 13/19] awk: fix SEGV on read error in -f PROGFILE
function old new delta
awk_main 829 843 +14
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
---
editors/awk.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index b3871ffc5..df9b7fdc9 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -3609,8 +3609,6 @@ static var *evaluate(node *op, var *res)
#undef sreg
}
-/* -------- main & co. -------- */
-
static int awk_exit(void)
{
unsigned i;
@@ -3717,6 +3715,8 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
g_progname = llist_pop(&list_f);
fd = xopen_stdin(g_progname);
s = xmalloc_read(fd, NULL); /* it's NUL-terminated */
+ if (!s)
+ bb_perror_msg_and_die("read error from '%s'", g_progname);
close(fd);
parse_program(s);
free(s);
--
2.46.0
From 2ca39ffd447ca874fcea933194829717d5573247 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Thu, 8 Jun 2023 10:42:39 +0200
Subject: [PATCH 14/19] awk: fix subst code to handle "start of word" pattern
correctly (needs REG_STARTEND)
function old new delta
awk_sub 637 714 +77
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
---
editors/awk.c | 49 ++++++++++++++++++++++++++++++++++++-------------
1 file changed, 36 insertions(+), 13 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index df9b7fdc9..171f0a7ea 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -2504,17 +2504,46 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest /*,in
regex_t sreg, *regex;
/* True only if called to implement gensub(): */
int subexp = (src != dest);
-
+#if defined(REG_STARTEND)
+ const char *src_string;
+ size_t src_strlen;
+ regexec_flags = REG_STARTEND;
+#else
+ regexec_flags = 0;
+#endif
resbuf = NULL;
residx = 0;
match_no = 0;
- regexec_flags = 0;
regex = as_regex(rn, &sreg);
sp = getvar_s(src ? src : intvar[F0]);
+#if defined(REG_STARTEND)
+ src_string = sp;
+ src_strlen = strlen(src_string);
+#endif
replen = strlen(repl);
- while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
- int so = pmatch[0].rm_so;
- int eo = pmatch[0].rm_eo;
+ for (;;) {
+ int so, eo;
+
+#if defined(REG_STARTEND)
+// REG_STARTEND: "This flag is a BSD extension, not present in POSIX"
+ size_t start_ofs = sp - src_string;
+ pmatch[0].rm_so = start_ofs;
+ pmatch[0].rm_eo = src_strlen;
+ if (regexec(regex, src_string, 10, pmatch, regexec_flags) != 0)
+ break;
+ eo = pmatch[0].rm_eo - start_ofs;
+ so = pmatch[0].rm_so - start_ofs;
+#else
+// BUG:
+// gsub(/\<b*/,"") on "abc" matches empty string at "a...",
+// advances sp one char (see "Empty match" comment later) to "bc"
+// ... and erroneously matches "b" even though it is NOT at the word start.
+ enum { start_ofs = 0 };
+ if (regexec(regex, sp, 10, pmatch, regexec_flags) != 0)
+ break;
+ so = pmatch[0].rm_so;
+ eo = pmatch[0].rm_eo;
+#endif
//bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
@@ -2543,7 +2572,7 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest /*,in
}
n = pmatch[j].rm_eo - pmatch[j].rm_so;
resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
- memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
+ memcpy(resbuf + residx, sp + pmatch[j].rm_so - start_ofs, n);
residx += n;
} else
resbuf[residx++] = c;
@@ -2557,12 +2586,6 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest /*,in
if (eo == so) {
/* Empty match (e.g. "b*" will match anywhere).
* Advance by one char. */
-//BUG (bug 1333):
-//gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
-//... and will erroneously match "b" even though it is NOT at the word start.
-//we need REG_NOTBOW but it does not exist...
-//TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
-//it should be able to do it correctly.
/* Subtle: this is safe only because
* qrealloc allocated at least one extra byte */
resbuf[residx] = *sp;
@@ -2571,7 +2594,7 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest /*,in
sp++;
residx++;
}
- regexec_flags = REG_NOTBOL;
+ regexec_flags |= REG_NOTBOL;
}
resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
--
2.46.0
From 5353df91cba7b397b9407701681708d0a1518df6 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Mon, 10 Jul 2023 17:25:21 +0200
Subject: [PATCH 15/19] Update applet size estimates
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
---
editors/awk.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/editors/awk.c b/editors/awk.c
index 171f0a7ea..efdff2778 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -7,7 +7,7 @@
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
*/
//config:config AWK
-//config: bool "awk (23 kb)"
+//config: bool "awk (24 kb)"
//config: default y
//config: help
//config: Awk is used as a pattern scanning and processing language.
--
2.46.0
From 92ab29fcf04bc3ff3d3ad897f1c2463d8b8d1410 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Mon, 2 Oct 2023 15:24:06 +0200
Subject: [PATCH 16/19] awk: implement -E; do not reorder -f and -e
function old new delta
awk_main 843 891 +48
next_input_file 243 261 +18
packed_usage 34631 34638 +7
.rodata 105391 105390 -1
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 3/1 up/down: 73/-1) Total: 72 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
---
editors/awk.c | 113 +++++++++++++++++++++++++++++---------------------
1 file changed, 65 insertions(+), 48 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index efdff2778..bc95c4155 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -40,7 +40,7 @@
//usage:#define awk_full_usage "\n\n"
//usage: " -v VAR=VAL Set variable"
//usage: "\n -F SEP Use SEP as field separator"
-//usage: "\n -f FILE Read program from FILE"
+//usage: "\n -f/-E FILE Read program from FILE"
//usage: IF_FEATURE_AWK_GNU_EXTENSIONS(
//usage: "\n -e AWK_PROGRAM"
//usage: )
@@ -76,8 +76,8 @@
* 1: -argz
*/
#define OPTSTR_AWK "+" \
- "F:v:*f:*" \
- IF_FEATURE_AWK_GNU_EXTENSIONS("e:*") \
+ "F:v:f:" \
+ IF_FEATURE_AWK_GNU_EXTENSIONS("e:E:") \
"W:"
enum {
OPTBIT_F, /* define field separator */
@@ -560,6 +560,7 @@ struct globals {
var *Fields;
char *g_pos;
char g_saved_ch;
+ smallint got_program;
smallint icase;
smallint exiting;
smallint nextrec;
@@ -635,6 +636,7 @@ struct globals2 {
#define Fields (G1.Fields )
#define g_pos (G1.g_pos )
#define g_saved_ch (G1.g_saved_ch )
+#define got_program (G1.got_program )
#define icase (G1.icase )
#define exiting (G1.exiting )
#define nextrec (G1.nextrec )
@@ -2899,11 +2901,13 @@ static int next_input_file(void)
}
fname = getvar_s(findvar(iamarray(intvar[ARGV]), utoa(argind)));
if (fname && *fname) {
- /* "If a filename on the command line has the form
- * var=val it is treated as a variable assignment"
- */
- if (try_to_assign(fname))
- continue;
+ if (got_program != 2) { /* there was no -E option */
+ /* "If a filename on the command line has the form
+ * var=val it is treated as a variable assignment"
+ */
+ if (try_to_assign(fname))
+ continue;
+ }
iF.F = xfopen_stdin(fname);
setvar_i(intvar[ARGIND], argind);
break;
@@ -3659,13 +3663,7 @@ static int awk_exit(void)
int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
int awk_main(int argc UNUSED_PARAM, char **argv)
{
- unsigned opt;
- char *opt_F;
- llist_t *list_v = NULL;
- llist_t *list_f = NULL;
-#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
- llist_t *list_e = NULL;
-#endif
+ int ch;
int i;
INIT_G();
@@ -3714,49 +3712,68 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
}
}
}
- opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL);
- argv += optind;
- //argc -= optind;
- if (opt & OPT_W)
- bb_simple_error_msg("warning: option -W is ignored");
- if (opt & OPT_F) {
- unescape_string_in_place(opt_F);
- setvar_s(intvar[FS], opt_F);
- }
- while (list_v) {
- if (!try_to_assign(llist_pop(&list_v)))
- bb_show_usage();
- }
- /* Parse all supplied programs */
fnhash = hash_init();
ahash = hash_init();
- while (list_f) {
- int fd;
- char *s;
- g_progname = llist_pop(&list_f);
- fd = xopen_stdin(g_progname);
- s = xmalloc_read(fd, NULL); /* it's NUL-terminated */
- if (!s)
- bb_perror_msg_and_die("read error from '%s'", g_progname);
- close(fd);
- parse_program(s);
- free(s);
- }
- g_progname = "cmd. line";
+ /* Cannot use getopt32: need to preserve order of -e / -f / -E / -i */
+ while ((ch = getopt(argc, argv, OPTSTR_AWK)) >= 0) {
+ switch (ch) {
+ case 'F':
+ unescape_string_in_place(optarg);
+ setvar_s(intvar[FS], optarg);
+ break;
+ case 'v':
+ if (!try_to_assign(optarg))
+ bb_show_usage();
+ break;
+//TODO: implement -i LIBRARY, it is easy-ish
+ case 'E':
+ case 'f': {
+ int fd;
+ char *s;
+ g_progname = optarg;
+ fd = xopen_stdin(g_progname);
+ s = xmalloc_read(fd, NULL); /* it's NUL-terminated */
+ if (!s)
+ bb_perror_msg_and_die("read error from '%s'", g_progname);
+ close(fd);
+ parse_program(s);
+ free(s);
+ got_program = 1;
+ if (ch == 'E') {
+ got_program = 2;
+ goto stop_option_parsing;
+ }
+ break;
+ }
#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
- while (list_e) {
- parse_program(llist_pop(&list_e));
- }
+ case 'e':
+ g_progname = "cmd. line";
+ parse_program(optarg);
+ got_program = 1;
+ break;
#endif
-//FIXME: preserve order of -e and -f
-//TODO: implement -i LIBRARY and -E FILE too, they are easy-ish
- if (!(opt & (OPT_f | OPT_e))) {
+ case 'W':
+ bb_simple_error_msg("warning: option -W is ignored");
+ break;
+ default:
+//bb_error_msg("ch:%d", ch);
+ bb_show_usage();
+ }
+ }
+ stop_option_parsing:
+
+ argv += optind;
+ //argc -= optind;
+
+ if (!got_program) {
if (!*argv)
bb_show_usage();
+ g_progname = "cmd. line";
parse_program(*argv++);
}
+
/* Free unused parse structures */
//hash_free(fnhash); // ~250 bytes when empty, used only for function names
//^^^^^^^^^^^^^^^^^ does not work, hash_clear() inside SEGVs
--
2.46.0
From 789ccac7d9d1a9e433570ac9628992a01f946643 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Sun, 31 Dec 2023 15:49:54 +0100
Subject: [PATCH 17/19] awk: fix handling of empty fields
Patch by M Rubon <rubonmtz@gmail.com>:
Busybox awk handles references to empty (not provided in the input)
fields differently during the first line of input, as compared to
subsequent lines.
$ (echo a ; echo b) | awk '$2 != 0' #wrong
b
No field $2 value is provided in the input. When awk references field
$2 for the "a" line, it is seen to have a different behaviour than
when it is referenced for the "b" line.
Problem in BusyBox v1.36.1 embedded in OpenWrt 23.05.0
Same problem also in 21.02 versions of OpenWrt
Same problem in BusyBox v1.37.0.git
I get the correct expected output from Ubuntu gawk and Debian mawk,
and from my fix.
will@dev:~$ (echo a ; echo b) | awk '$2 != 0' #correct
a
b
will@dev:~/busybox$ (echo a ; echo b ) | ./busybox awk '$2 != 0' #fixed
a
b
I built and poked into the source code at editors/awk.c The function
fsrealloc(int size) is core to allocating, initializing, reallocating,
and reinitializing fields, both real input line fields and imaginary
fields that the script references but do not exist in the input.
When fsrealloc() needs more field space than it has previously
allocated, it initializes those new fields differently than how they
are later reinitialized for the next input line. This works fine for
fields defined in the input, like $1, but does not work the first time
when there is no input for that field (e.g. field $99)
My one-line fix simply makes the initialization and clrvar()
reinitialization use the same value for .type. I am not sure if there
are regression tests to run, but I have not done those.
I'm not sure if I understand why clrvar() is not setting .type to a
default constant value, but in any case I have left that untouched.
function old new delta
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 0/0 up/down: 0/0) Total: 0 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
---
editors/awk.c | 33 +++++++++++++++++----------------
1 file changed, 17 insertions(+), 16 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index bc95c4155..aa485c782 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -555,8 +555,9 @@ struct globals {
//we are reusing ahash as fdhash, via define (see later)
const char *g_progname;
int g_lineno;
- int nfields;
- unsigned maxfields;
+ int num_fields; /* number of existing $N's */
+ unsigned num_alloc_fields; /* current size of Fields[] */
+ /* NB: Fields[0] corresponds to $1, not to $0 */
var *Fields;
char *g_pos;
char g_saved_ch;
@@ -631,8 +632,8 @@ struct globals2 {
// for fdhash in execution stage.
#define g_progname (G1.g_progname )
#define g_lineno (G1.g_lineno )
-#define nfields (G1.nfields )
-#define maxfields (G1.maxfields )
+#define num_fields (G1.num_fields )
+#define num_alloc_fields (G1.num_alloc_fields)
#define Fields (G1.Fields )
#define g_pos (G1.g_pos )
#define g_saved_ch (G1.g_saved_ch )
@@ -1966,30 +1967,30 @@ static void fsrealloc(int size)
{
int i, newsize;
- if ((unsigned)size >= maxfields) {
+ if ((unsigned)size >= num_alloc_fields) {
/* Sanity cap, easier than catering for over/underflows */
if ((unsigned)size > 0xffffff)
bb_die_memory_exhausted();
- i = maxfields;
- maxfields = size + 16;
+ i = num_alloc_fields;
+ num_alloc_fields = size + 16;
- newsize = maxfields * sizeof(Fields[0]);
+ newsize = num_alloc_fields * sizeof(Fields[0]);
debug_printf_eval("fsrealloc: xrealloc(%p, %u)\n", Fields, newsize);
Fields = xrealloc(Fields, newsize);
debug_printf_eval("fsrealloc: Fields=%p..%p\n", Fields, (char*)Fields + newsize - 1);
/* ^^^ did Fields[] move? debug aid for L.v getting "upstaged" by R.v in evaluate() */
- for (; i < maxfields; i++) {
- Fields[i].type = VF_SPECIAL;
+ for (; i < num_alloc_fields; i++) {
+ Fields[i].type = VF_SPECIAL | VF_DIRTY;
Fields[i].string = NULL;
}
}
- /* if size < nfields, clear extra field variables */
- for (i = size; i < nfields; i++) {
+ /* if size < num_fields, clear extra field variables */
+ for (i = size; i < num_fields; i++) {
clrvar(Fields + i);
}
- nfields = size;
+ num_fields = size;
}
static int regexec1_nonempty(const regex_t *preg, const char *s, regmatch_t pmatch[])
@@ -2126,7 +2127,7 @@ static void split_f0(void)
/* set NF manually to avoid side effects */
clrvar(intvar[NF]);
intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
- intvar[NF]->number = nfields;
+ intvar[NF]->number = num_fields;
#undef fstrings
}
@@ -2976,7 +2977,7 @@ static var *evaluate(node *op, var *res)
syntax_error(EMSG_TOO_FEW_ARGS);
L.v = evaluate(op1, TMPVAR0);
/* Does L.v point to $n variable? */
- if ((size_t)(L.v - Fields) < maxfields) {
+ if ((size_t)(L.v - Fields) < num_alloc_fields) {
/* yes, remember where Fields[] is */
old_Fields_ptr = Fields;
}
@@ -3517,7 +3518,7 @@ static var *evaluate(node *op, var *res)
res = intvar[F0];
} else {
split_f0();
- if (i > nfields)
+ if (i > num_fields)
fsrealloc(i);
res = &Fields[i - 1];
}
--
2.46.0
From e1a68741067167dc4837e0a26d3d5c318a631fc7 Mon Sep 17 00:00:00 2001
From: Ron Yorston <rmy@pobox.com>
Date: Fri, 19 Jan 2024 15:41:17 +0000
Subject: [PATCH 18/19] awk: fix segfault when compiled by clang
A 32-bit build of BusyBox using clang segfaulted in the test
"awk assign while assign". Specifically, on line 7 of the test
input where the adjustment of the L.v pointer when the Fields
array was reallocated
L.v += Fields - old_Fields_ptr;
was out by 4 bytes.
Rearrange to code so both gcc and clang generate code that works.
Signed-off-by: Ron Yorston <rmy@pobox.com>
Signed-off-by: Bernhard Reutner-Fischer <rep.dot.nop@gmail.com>
Signed-off-by: Muhammad Falak R Wani <falakreyaz@gmail.com>
---
editors/awk.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/editors/awk.c b/editors/awk.c
index aa485c782..0981c6735 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -3006,7 +3006,7 @@ static var *evaluate(node *op, var *res)
if (old_Fields_ptr) {
//if (old_Fields_ptr != Fields)
// debug_printf_eval("L.v moved\n");
- L.v += Fields - old_Fields_ptr;
+ L.v = Fields + (L.v - old_Fields_ptr);
}
if (opinfo & OF_STR2) {
R.s = getvar_s(R.v);
--
2.46.0