2023-02-24 03:09:24 +03:00
|
|
|
#include "git-compat-util.h"
|
2008-07-21 22:03:49 +04:00
|
|
|
#include "string-list.h"
|
2023-02-24 03:09:24 +03:00
|
|
|
#include "alloc.h"
|
2008-07-21 22:03:49 +04:00
|
|
|
|
string-list.[ch]: add a string_list_init_{nodup,dup}()
In order to use the new "memcpy() a 'blank' struct on the stack"
pattern for string_list_init(), and to make the macro initialization
consistent with the function initialization introduce two new
string_list_init_{nodup,dup}() functions. These are like the old
string_list_init() when called with a false and true second argument,
respectively.
I think this not only makes things more consistent, but also easier to
read. I often had to lookup what the ", 0)" or ", 1)" in these
invocations meant, now it's right there in the function name, and
corresponds to the macros.
A subsequent commit will convert existing API users to this pattern,
but as this is a very common API let's leave a compatibility function
in place for later removal. This intermediate state also proves that
the compatibility function works.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-07-01 13:51:28 +03:00
|
|
|
void string_list_init_nodup(struct string_list *list)
|
|
|
|
{
|
|
|
|
struct string_list blank = STRING_LIST_INIT_NODUP;
|
|
|
|
memcpy(list, &blank, sizeof(*list));
|
|
|
|
}
|
|
|
|
|
|
|
|
void string_list_init_dup(struct string_list *list)
|
|
|
|
{
|
|
|
|
struct string_list blank = STRING_LIST_INIT_DUP;
|
|
|
|
memcpy(list, &blank, sizeof(*list));
|
|
|
|
}
|
|
|
|
|
2008-07-21 22:03:49 +04:00
|
|
|
/* if there is no exact match, point to the index where the entry could be
|
|
|
|
* inserted */
|
|
|
|
static int get_entry_index(const struct string_list *list, const char *string,
|
|
|
|
int *exact_match)
|
|
|
|
{
|
|
|
|
int left = -1, right = list->nr;
|
2013-01-08 00:24:55 +04:00
|
|
|
compare_strings_fn cmp = list->cmp ? list->cmp : strcmp;
|
2008-07-21 22:03:49 +04:00
|
|
|
|
|
|
|
while (left + 1 < right) {
|
2017-10-08 21:29:37 +03:00
|
|
|
int middle = left + (right - left) / 2;
|
2013-01-08 00:24:55 +04:00
|
|
|
int compare = cmp(string, list->items[middle].string);
|
2008-07-21 22:03:49 +04:00
|
|
|
if (compare < 0)
|
|
|
|
right = middle;
|
|
|
|
else if (compare > 0)
|
|
|
|
left = middle;
|
|
|
|
else {
|
|
|
|
*exact_match = 1;
|
|
|
|
return middle;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
*exact_match = 0;
|
|
|
|
return right;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* returns -1-index if already exists */
|
2009-02-08 17:34:28 +03:00
|
|
|
static int add_entry(int insert_at, struct string_list *list, const char *string)
|
2008-07-21 22:03:49 +04:00
|
|
|
{
|
2009-02-08 17:34:28 +03:00
|
|
|
int exact_match = 0;
|
|
|
|
int index = insert_at != -1 ? insert_at : get_entry_index(list, string, &exact_match);
|
2008-07-21 22:03:49 +04:00
|
|
|
|
|
|
|
if (exact_match)
|
|
|
|
return -1 - index;
|
|
|
|
|
2017-04-14 22:51:52 +03:00
|
|
|
ALLOC_GROW(list->items, list->nr+1, list->alloc);
|
2008-07-21 22:03:49 +04:00
|
|
|
if (index < list->nr)
|
2017-07-15 23:00:45 +03:00
|
|
|
MOVE_ARRAY(list->items + index + 1, list->items + index,
|
|
|
|
list->nr - index);
|
2008-07-21 22:03:49 +04:00
|
|
|
list->items[index].string = list->strdup_strings ?
|
|
|
|
xstrdup(string) : (char *)string;
|
|
|
|
list->items[index].util = NULL;
|
|
|
|
list->nr++;
|
|
|
|
|
|
|
|
return index;
|
|
|
|
}
|
|
|
|
|
2010-06-26 03:41:35 +04:00
|
|
|
struct string_list_item *string_list_insert(struct string_list *list, const char *string)
|
2008-07-21 22:03:49 +04:00
|
|
|
{
|
2014-11-25 00:22:04 +03:00
|
|
|
int index = add_entry(-1, list, string);
|
2008-07-21 22:03:49 +04:00
|
|
|
|
|
|
|
if (index < 0)
|
|
|
|
index = -1 - index;
|
|
|
|
|
|
|
|
return list->items + index;
|
|
|
|
}
|
|
|
|
|
2017-04-20 02:13:21 +03:00
|
|
|
void string_list_remove(struct string_list *list, const char *string,
|
|
|
|
int free_util)
|
|
|
|
{
|
|
|
|
int exact_match;
|
|
|
|
int i = get_entry_index(list, string, &exact_match);
|
|
|
|
|
|
|
|
if (exact_match) {
|
|
|
|
if (list->strdup_strings)
|
|
|
|
free(list->items[i].string);
|
|
|
|
if (free_util)
|
|
|
|
free(list->items[i].util);
|
|
|
|
|
|
|
|
list->nr--;
|
2017-07-15 23:00:45 +03:00
|
|
|
MOVE_ARRAY(list->items + i, list->items + i + 1, list->nr - i);
|
2017-04-20 02:13:21 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-07-21 22:03:49 +04:00
|
|
|
int string_list_has_string(const struct string_list *list, const char *string)
|
|
|
|
{
|
|
|
|
int exact_match;
|
|
|
|
get_entry_index(list, string, &exact_match);
|
|
|
|
return exact_match;
|
|
|
|
}
|
|
|
|
|
2009-02-08 17:34:28 +03:00
|
|
|
int string_list_find_insert_index(const struct string_list *list, const char *string,
|
|
|
|
int negative_existing_index)
|
|
|
|
{
|
|
|
|
int exact_match;
|
|
|
|
int index = get_entry_index(list, string, &exact_match);
|
|
|
|
if (exact_match)
|
|
|
|
index = -1 - (negative_existing_index ? index : 0);
|
|
|
|
return index;
|
|
|
|
}
|
|
|
|
|
2010-06-26 03:41:37 +04:00
|
|
|
struct string_list_item *string_list_lookup(struct string_list *list, const char *string)
|
2008-07-21 22:03:49 +04:00
|
|
|
{
|
|
|
|
int exact_match, i = get_entry_index(list, string, &exact_match);
|
|
|
|
if (!exact_match)
|
|
|
|
return NULL;
|
|
|
|
return list->items + i;
|
|
|
|
}
|
|
|
|
|
2012-09-12 18:04:45 +04:00
|
|
|
void string_list_remove_duplicates(struct string_list *list, int free_util)
|
|
|
|
{
|
|
|
|
if (list->nr > 1) {
|
|
|
|
int src, dst;
|
2013-01-08 00:24:55 +04:00
|
|
|
compare_strings_fn cmp = list->cmp ? list->cmp : strcmp;
|
2012-09-12 18:04:45 +04:00
|
|
|
for (src = dst = 1; src < list->nr; src++) {
|
2013-01-08 00:24:55 +04:00
|
|
|
if (!cmp(list->items[dst - 1].string, list->items[src].string)) {
|
2012-09-12 18:04:45 +04:00
|
|
|
if (list->strdup_strings)
|
|
|
|
free(list->items[src].string);
|
|
|
|
if (free_util)
|
|
|
|
free(list->items[src].util);
|
|
|
|
} else
|
|
|
|
list->items[dst++] = list->items[src];
|
|
|
|
}
|
|
|
|
list->nr = dst;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-06-26 03:41:34 +04:00
|
|
|
int for_each_string_list(struct string_list *list,
|
|
|
|
string_list_each_func_t fn, void *cb_data)
|
2009-02-25 11:32:18 +03:00
|
|
|
{
|
|
|
|
int i, ret = 0;
|
|
|
|
for (i = 0; i < list->nr; i++)
|
|
|
|
if ((ret = fn(&list->items[i], cb_data)))
|
|
|
|
break;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2012-09-12 18:04:44 +04:00
|
|
|
void filter_string_list(struct string_list *list, int free_util,
|
|
|
|
string_list_each_func_t want, void *cb_data)
|
|
|
|
{
|
|
|
|
int src, dst = 0;
|
|
|
|
for (src = 0; src < list->nr; src++) {
|
|
|
|
if (want(&list->items[src], cb_data)) {
|
|
|
|
list->items[dst++] = list->items[src];
|
|
|
|
} else {
|
|
|
|
if (list->strdup_strings)
|
|
|
|
free(list->items[src].string);
|
|
|
|
if (free_util)
|
|
|
|
free(list->items[src].util);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
list->nr = dst;
|
|
|
|
}
|
|
|
|
|
2022-10-18 04:05:32 +03:00
|
|
|
static int item_is_not_empty(struct string_list_item *item, void *data UNUSED)
|
2012-11-04 11:07:06 +04:00
|
|
|
{
|
|
|
|
return *item->string != '\0';
|
|
|
|
}
|
|
|
|
|
2018-12-09 13:25:21 +03:00
|
|
|
void string_list_remove_empty_items(struct string_list *list, int free_util)
|
|
|
|
{
|
2012-11-04 11:07:06 +04:00
|
|
|
filter_string_list(list, free_util, item_is_not_empty, NULL);
|
|
|
|
}
|
|
|
|
|
2008-07-21 22:03:49 +04:00
|
|
|
void string_list_clear(struct string_list *list, int free_util)
|
|
|
|
{
|
|
|
|
if (list->items) {
|
|
|
|
int i;
|
|
|
|
if (list->strdup_strings) {
|
|
|
|
for (i = 0; i < list->nr; i++)
|
|
|
|
free(list->items[i].string);
|
|
|
|
}
|
|
|
|
if (free_util) {
|
|
|
|
for (i = 0; i < list->nr; i++)
|
|
|
|
free(list->items[i].util);
|
|
|
|
}
|
|
|
|
free(list->items);
|
|
|
|
}
|
|
|
|
list->items = NULL;
|
|
|
|
list->nr = list->alloc = 0;
|
|
|
|
}
|
|
|
|
|
2009-02-08 17:34:28 +03:00
|
|
|
void string_list_clear_func(struct string_list *list, string_list_clear_func_t clearfunc)
|
|
|
|
{
|
|
|
|
if (list->items) {
|
|
|
|
int i;
|
|
|
|
if (clearfunc) {
|
|
|
|
for (i = 0; i < list->nr; i++)
|
|
|
|
clearfunc(list->items[i].util, list->items[i].string);
|
|
|
|
}
|
|
|
|
if (list->strdup_strings) {
|
|
|
|
for (i = 0; i < list->nr; i++)
|
|
|
|
free(list->items[i].string);
|
|
|
|
}
|
|
|
|
free(list->items);
|
|
|
|
}
|
|
|
|
list->items = NULL;
|
|
|
|
list->nr = list->alloc = 0;
|
|
|
|
}
|
|
|
|
|
string-list: introduce `string_list_setlen()`
It is sometimes useful to reduce the size of a `string_list`'s list of
items without having to re-allocate them. For example, doing the
following:
struct strbuf buf = STRBUF_INIT;
struct string_list parts = STRING_LIST_INIT_NO_DUP;
while (strbuf_getline(&buf, stdin) != EOF) {
parts.nr = 0;
string_list_split_in_place(&parts, buf.buf, ":", -1);
/* ... */
}
string_list_clear(&parts, 0);
is preferable over calling `string_list_clear()` on every iteration of
the loop. This is because `string_list_clear()` causes us free our
existing `items` array. This means that every time we call
`string_list_split_in_place()`, the string-list internals re-allocate
the same size array.
Since in the above example we do not care about the individual parts
after processing each line, it is much more efficient to pretend that
there aren't any elements in the `string_list` by setting `list->nr` to
0 while leaving the list of elements allocated as-is.
This allows `string_list_split_in_place()` to overwrite any existing
entries without needing to free and re-allocate them.
However, setting `list->nr` manually is not safe in all instances. There
are a couple of cases worth worrying about:
- If the `string_list` is initialized with `strdup_strings`,
truncating the list can lead to overwriting strings which are
allocated elsewhere. If there aren't any other pointers to those
strings other than the ones inside of the `items` array, they will
become unreachable and leak.
(We could ourselves free the truncated items between
string_list->items[nr] and `list->nr`, but no present or future
callers would benefit from this additional complexity).
- If the given `nr` is larger than the current value of `list->nr`,
we'll trick the `string_list` into a state where it thinks there are
more items allocated than there actually are, which can lead to
undefined behavior if we try to read or write those entries.
Guard against both of these by introducing a helper function which
guards assignment of `list->nr` against each of the above conditions.
Co-authored-by: Jeff King <peff@peff.net>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-04-25 01:20:14 +03:00
|
|
|
void string_list_setlen(struct string_list *list, size_t nr)
|
|
|
|
{
|
|
|
|
if (list->strdup_strings)
|
|
|
|
BUG("cannot setlen a string_list which owns its entries");
|
|
|
|
if (nr > list->nr)
|
|
|
|
BUG("cannot grow a string_list with setlen");
|
|
|
|
list->nr = nr;
|
|
|
|
}
|
|
|
|
|
2012-09-12 18:04:42 +04:00
|
|
|
struct string_list_item *string_list_append_nodup(struct string_list *list,
|
|
|
|
char *string)
|
2008-07-21 22:03:49 +04:00
|
|
|
{
|
2012-09-12 18:04:42 +04:00
|
|
|
struct string_list_item *retval;
|
2008-07-21 22:03:49 +04:00
|
|
|
ALLOC_GROW(list->items, list->nr + 1, list->alloc);
|
2012-09-12 18:04:42 +04:00
|
|
|
retval = &list->items[list->nr++];
|
|
|
|
retval->string = string;
|
|
|
|
retval->util = NULL;
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct string_list_item *string_list_append(struct string_list *list,
|
|
|
|
const char *string)
|
|
|
|
{
|
|
|
|
return string_list_append_nodup(
|
|
|
|
list,
|
|
|
|
list->strdup_strings ? xstrdup(string) : (char *)string);
|
2008-07-21 22:03:49 +04:00
|
|
|
}
|
|
|
|
|
2018-07-09 22:25:36 +03:00
|
|
|
/*
|
|
|
|
* Encapsulate the compare function pointer because ISO C99 forbids
|
|
|
|
* casting from void * to a function pointer and vice versa.
|
|
|
|
*/
|
|
|
|
struct string_list_sort_ctx
|
|
|
|
{
|
|
|
|
compare_strings_fn cmp;
|
|
|
|
};
|
|
|
|
|
2017-01-22 20:57:09 +03:00
|
|
|
static int cmp_items(const void *a, const void *b, void *ctx)
|
2008-07-21 22:03:49 +04:00
|
|
|
{
|
2018-07-09 22:25:36 +03:00
|
|
|
struct string_list_sort_ctx *sort_ctx = ctx;
|
2008-07-21 22:03:49 +04:00
|
|
|
const struct string_list_item *one = a;
|
|
|
|
const struct string_list_item *two = b;
|
2018-07-09 22:25:36 +03:00
|
|
|
return sort_ctx->cmp(one->string, two->string);
|
2008-07-21 22:03:49 +04:00
|
|
|
}
|
|
|
|
|
2014-11-25 11:02:35 +03:00
|
|
|
void string_list_sort(struct string_list *list)
|
2008-07-21 22:03:49 +04:00
|
|
|
{
|
2018-07-09 22:25:36 +03:00
|
|
|
struct string_list_sort_ctx sort_ctx = {list->cmp ? list->cmp : strcmp};
|
|
|
|
|
|
|
|
QSORT_S(list->items, list->nr, cmp_items, &sort_ctx);
|
2008-07-21 22:03:49 +04:00
|
|
|
}
|
|
|
|
|
2010-03-24 10:16:02 +03:00
|
|
|
struct string_list_item *unsorted_string_list_lookup(struct string_list *list,
|
|
|
|
const char *string)
|
2008-07-21 22:03:49 +04:00
|
|
|
{
|
2016-04-25 20:40:00 +03:00
|
|
|
struct string_list_item *item;
|
2013-01-08 00:24:55 +04:00
|
|
|
compare_strings_fn cmp = list->cmp ? list->cmp : strcmp;
|
|
|
|
|
2016-04-25 20:40:00 +03:00
|
|
|
for_each_string_list_item(item, list)
|
|
|
|
if (!cmp(string, item->string))
|
|
|
|
return item;
|
2010-03-24 10:16:02 +03:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
int unsorted_string_list_has_string(struct string_list *list,
|
|
|
|
const char *string)
|
|
|
|
{
|
|
|
|
return unsorted_string_list_lookup(list, string) != NULL;
|
2008-07-21 22:03:49 +04:00
|
|
|
}
|
|
|
|
|
2011-08-12 09:20:00 +04:00
|
|
|
void unsorted_string_list_delete_item(struct string_list *list, int i, int free_util)
|
|
|
|
{
|
|
|
|
if (list->strdup_strings)
|
|
|
|
free(list->items[i].string);
|
|
|
|
if (free_util)
|
|
|
|
free(list->items[i].util);
|
|
|
|
list->items[i] = list->items[list->nr-1];
|
|
|
|
list->nr--;
|
|
|
|
}
|
2012-09-12 18:04:43 +04:00
|
|
|
|
|
|
|
int string_list_split(struct string_list *list, const char *string,
|
|
|
|
int delim, int maxsplit)
|
|
|
|
{
|
|
|
|
int count = 0;
|
|
|
|
const char *p = string, *end;
|
|
|
|
|
|
|
|
if (!list->strdup_strings)
|
|
|
|
die("internal error in string_list_split(): "
|
|
|
|
"list->strdup_strings must be set");
|
|
|
|
for (;;) {
|
|
|
|
count++;
|
|
|
|
if (maxsplit >= 0 && count > maxsplit) {
|
|
|
|
string_list_append(list, p);
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
end = strchr(p, delim);
|
|
|
|
if (end) {
|
|
|
|
string_list_append_nodup(list, xmemdupz(p, end - p));
|
|
|
|
p = end + 1;
|
|
|
|
} else {
|
|
|
|
string_list_append(list, p);
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int string_list_split_in_place(struct string_list *list, char *string,
|
string-list: multi-delimiter `string_list_split_in_place()`
Enhance `string_list_split_in_place()` to accept multiple characters as
delimiters instead of a single character.
Instead of using `strchr(2)` to locate the first occurrence of the given
delimiter character, `string_list_split_in_place_multi()` uses
`strcspn(2)` to move past the initial segment of characters comprised of
any characters in the delimiting set.
When only a single delimiting character is provided, `strpbrk(2)` (which
is implemented with `strcspn(2)`) has equivalent performance to
`strchr(2)`. Modern `strcspn(2)` implementations treat an empty
delimiter or the singleton delimiter as a special case and fall back to
calling strchrnul(). Both glibc[1] and musl[2] implement `strcspn(2)`
this way.
This change is one step to removing `strtok(2)` from the tree. Note that
`string_list_split_in_place()` is not a strict replacement for
`strtok()`, since it will happily turn sequential delimiter characters
into empty entries in the resulting string_list. For example:
string_list_split_in_place(&xs, "foo:;:bar:;:baz", ":;", -1)
would yield a string list of:
["foo", "", "", "bar", "", "", "baz"]
Callers that wish to emulate the behavior of strtok(2) more directly
should call `string_list_remove_empty_items()` after splitting.
To avoid regressions for the new multi-character delimter cases, update
t0063 in this patch as well.
[1]: https://sourceware.org/git/?p=glibc.git;a=blob;f=string/strcspn.c;hb=glibc-2.37#l35
[2]: https://git.musl-libc.org/cgit/musl/tree/src/string/strcspn.c?h=v1.2.3#n11
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-04-25 01:20:10 +03:00
|
|
|
const char *delim, int maxsplit)
|
2012-09-12 18:04:43 +04:00
|
|
|
{
|
|
|
|
int count = 0;
|
|
|
|
char *p = string, *end;
|
|
|
|
|
|
|
|
if (list->strdup_strings)
|
|
|
|
die("internal error in string_list_split_in_place(): "
|
|
|
|
"list->strdup_strings must not be set");
|
|
|
|
for (;;) {
|
|
|
|
count++;
|
|
|
|
if (maxsplit >= 0 && count > maxsplit) {
|
|
|
|
string_list_append(list, p);
|
|
|
|
return count;
|
|
|
|
}
|
string-list: multi-delimiter `string_list_split_in_place()`
Enhance `string_list_split_in_place()` to accept multiple characters as
delimiters instead of a single character.
Instead of using `strchr(2)` to locate the first occurrence of the given
delimiter character, `string_list_split_in_place_multi()` uses
`strcspn(2)` to move past the initial segment of characters comprised of
any characters in the delimiting set.
When only a single delimiting character is provided, `strpbrk(2)` (which
is implemented with `strcspn(2)`) has equivalent performance to
`strchr(2)`. Modern `strcspn(2)` implementations treat an empty
delimiter or the singleton delimiter as a special case and fall back to
calling strchrnul(). Both glibc[1] and musl[2] implement `strcspn(2)`
this way.
This change is one step to removing `strtok(2)` from the tree. Note that
`string_list_split_in_place()` is not a strict replacement for
`strtok()`, since it will happily turn sequential delimiter characters
into empty entries in the resulting string_list. For example:
string_list_split_in_place(&xs, "foo:;:bar:;:baz", ":;", -1)
would yield a string list of:
["foo", "", "", "bar", "", "", "baz"]
Callers that wish to emulate the behavior of strtok(2) more directly
should call `string_list_remove_empty_items()` after splitting.
To avoid regressions for the new multi-character delimter cases, update
t0063 in this patch as well.
[1]: https://sourceware.org/git/?p=glibc.git;a=blob;f=string/strcspn.c;hb=glibc-2.37#l35
[2]: https://git.musl-libc.org/cgit/musl/tree/src/string/strcspn.c?h=v1.2.3#n11
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-04-25 01:20:10 +03:00
|
|
|
end = strpbrk(p, delim);
|
2012-09-12 18:04:43 +04:00
|
|
|
if (end) {
|
|
|
|
*end = '\0';
|
|
|
|
string_list_append(list, p);
|
|
|
|
p = end + 1;
|
|
|
|
} else {
|
|
|
|
string_list_append(list, p);
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|