2006-07-30 22:25:18 +04:00
|
|
|
#include "cache.h"
|
2020-07-28 23:23:39 +03:00
|
|
|
#include "strvec.h"
|
2017-08-03 21:19:59 +03:00
|
|
|
#include "repository.h"
|
2017-06-14 21:07:36 +03:00
|
|
|
#include "config.h"
|
2006-12-05 03:00:46 +03:00
|
|
|
#include "dir.h"
|
2006-07-30 22:25:18 +04:00
|
|
|
#include "tree.h"
|
|
|
|
#include "tree-walk.h"
|
2006-07-30 22:26:15 +04:00
|
|
|
#include "cache-tree.h"
|
2006-07-30 22:25:18 +04:00
|
|
|
#include "unpack-trees.h"
|
2007-04-18 22:27:45 +04:00
|
|
|
#include "progress.h"
|
2007-07-17 22:28:28 +04:00
|
|
|
#include "refs.h"
|
2009-03-14 07:24:08 +03:00
|
|
|
#include "attr.h"
|
2014-06-13 16:19:36 +04:00
|
|
|
#include "split-index.h"
|
2017-03-15 00:46:39 +03:00
|
|
|
#include "submodule.h"
|
|
|
|
#include "submodule-config.h"
|
2017-09-22 19:35:40 +03:00
|
|
|
#include "fsmonitor.h"
|
2018-05-16 02:42:15 +03:00
|
|
|
#include "object-store.h"
|
2019-06-25 16:40:31 +03:00
|
|
|
#include "promisor-remote.h"
|
2021-03-23 17:19:32 +03:00
|
|
|
#include "entry.h"
|
unpack-trees: add basic support for parallel checkout
This new interface allows us to enqueue some of the entries being
checked out to later uncompress them, apply in-process filters, and
write out the files in parallel. For now, the parallel checkout
machinery is enabled by default and there is no user configuration, but
run_parallel_checkout() just writes the queued entries in sequence
(without spawning additional workers). The next patch will actually
implement the parallelism and, later, we will make it configurable.
Note that, to avoid potential data races, not all entries are eligible
for parallel checkout. Also, paths that collide on disk (e.g.
case-sensitive paths in case-insensitive file systems), are detected by
the parallel checkout code and skipped, so that they can be safely
sequentially handled later. The collision detection works like the
following:
- If the collision was at basename (e.g. 'a/b' and 'a/B'), the framework
detects it by looking for EEXIST and EISDIR errors after an
open(O_CREAT | O_EXCL) failure.
- If the collision was at dirname (e.g. 'a/b' and 'A'), it is detected
at the has_dirs_only_path() check, which is done for the leading path
of each item in the parallel checkout queue.
Both verifications rely on the fact that, before enqueueing an entry for
parallel checkout, checkout_entry() makes sure that there is no file at
the entry's path and that its leading components are all real
directories. So, any later change in these conditions indicates that
there was a collision (either between two parallel-eligible entries or
between an eligible and an ineligible one).
After all parallel-eligible entries have been processed, the collided
(and thus, skipped) entries are sequentially fed to checkout_entry()
again. This is similar to the way the current code deals with
collisions, overwriting the previously checked out entries with the
subsequent ones. The only difference is that, since we no longer create
the files in the same order that they appear on index, we are not able
to determine which of the colliding entries will survive on disk (for
the classic code, it is always the last entry).
Co-authored-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Co-authored-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-04-19 03:14:53 +03:00
|
|
|
#include "parallel-checkout.h"
|
2006-07-30 22:25:18 +04:00
|
|
|
|
2008-05-17 23:03:49 +04:00
|
|
|
/*
|
|
|
|
* Error messages expected by scripts out of plumbing commands such as
|
|
|
|
* read-tree. Non-scripted Porcelain is not required to use these messages
|
|
|
|
* and in fact are encouraged to reword them to better suit their particular
|
2010-08-11 12:38:05 +04:00
|
|
|
* situation better. See how "git checkout" and "git merge" replaces
|
2010-09-02 15:57:33 +04:00
|
|
|
* them using setup_unpack_trees_porcelain(), for example.
|
2008-05-17 23:03:49 +04:00
|
|
|
*/
|
2020-03-27 03:48:57 +03:00
|
|
|
static const char *unpack_plumbing_errors[NB_UNPACK_TREES_WARNING_TYPES] = {
|
2010-08-11 12:38:04 +04:00
|
|
|
/* ERROR_WOULD_OVERWRITE */
|
2008-05-17 23:03:49 +04:00
|
|
|
"Entry '%s' would be overwritten by merge. Cannot merge.",
|
|
|
|
|
2010-08-11 12:38:04 +04:00
|
|
|
/* ERROR_NOT_UPTODATE_FILE */
|
2008-05-17 23:03:49 +04:00
|
|
|
"Entry '%s' not uptodate. Cannot merge.",
|
|
|
|
|
2010-08-11 12:38:04 +04:00
|
|
|
/* ERROR_NOT_UPTODATE_DIR */
|
2008-05-17 23:03:49 +04:00
|
|
|
"Updating '%s' would lose untracked files in it",
|
|
|
|
|
2010-08-11 12:38:06 +04:00
|
|
|
/* ERROR_WOULD_LOSE_UNTRACKED_OVERWRITTEN */
|
|
|
|
"Untracked working tree file '%s' would be overwritten by merge.",
|
2008-05-17 23:03:49 +04:00
|
|
|
|
2010-08-11 12:38:06 +04:00
|
|
|
/* ERROR_WOULD_LOSE_UNTRACKED_REMOVED */
|
|
|
|
"Untracked working tree file '%s' would be removed by merge.",
|
2008-05-17 23:03:49 +04:00
|
|
|
|
2010-08-11 12:38:04 +04:00
|
|
|
/* ERROR_BIND_OVERLAP */
|
2008-05-17 23:03:49 +04:00
|
|
|
"Entry '%s' overlaps with '%s'. Cannot bind.",
|
2009-08-20 17:47:09 +04:00
|
|
|
|
2020-03-27 03:48:55 +03:00
|
|
|
/* ERROR_WOULD_LOSE_SUBMODULE */
|
|
|
|
"Submodule '%s' cannot checkout new HEAD.",
|
2009-08-20 17:47:09 +04:00
|
|
|
|
2020-03-27 03:48:57 +03:00
|
|
|
/* NB_UNPACK_TREES_ERROR_TYPES; just a meta value */
|
|
|
|
"",
|
2010-08-11 12:38:06 +04:00
|
|
|
|
2020-03-27 03:48:56 +03:00
|
|
|
/* WARNING_SPARSE_NOT_UPTODATE_FILE */
|
2020-03-27 03:48:58 +03:00
|
|
|
"Path '%s' not uptodate; will not remove from working tree.",
|
2017-03-15 00:46:39 +03:00
|
|
|
|
2020-03-27 03:48:59 +03:00
|
|
|
/* WARNING_SPARSE_UNMERGED_FILE */
|
|
|
|
"Path '%s' unmerged; will not remove from working tree.",
|
|
|
|
|
2020-03-27 03:48:56 +03:00
|
|
|
/* WARNING_SPARSE_ORPHANED_NOT_OVERWRITTEN */
|
2020-03-27 03:48:58 +03:00
|
|
|
"Path '%s' already present; will not overwrite with sparse update.",
|
2008-05-17 23:03:49 +04:00
|
|
|
};
|
|
|
|
|
2010-08-11 12:38:04 +04:00
|
|
|
#define ERRORMSG(o,type) \
|
|
|
|
( ((o) && (o)->msgs[(type)]) \
|
|
|
|
? ((o)->msgs[(type)]) \
|
|
|
|
: (unpack_plumbing_errors[(type)]) )
|
2008-05-17 23:03:49 +04:00
|
|
|
|
unpack-trees: support super-prefix option
In the future we want to support working tree operations within submodules,
e.g. "git checkout --recurse-submodules", which will update the submodule
to the commit as recorded in its superproject. In the submodule the
unpack-tree operation is carried out as usual, but the reporting to the
user needs to prefix any path with the superproject. The mechanism for
this is the super-prefix. (see 74866d757, git: make super-prefix option)
Add support for the super-prefix option for commands that unpack trees
by wrapping any path output in unpacking trees in the newly introduced
super_prefixed function. This new function prefixes any path with the
super-prefix if there is one. Assuming the submodule case doesn't happen
in the majority of the cases, we'd want to have a fast behavior for no
super prefix, i.e. no reallocation/copying, but just returning path.
Another aspect of introducing the `super_prefixed` function is to consider
who owns the memory and if this is the right place where the path gets
modified. As the super prefix ought to change the output behavior only and
not the actual unpack tree part, it is fine to be that late in the line.
As we get passed in 'const char *path', we cannot change the path itself,
which means in case of a super prefix we have to copy over the path.
We need two static buffers in that function as the error messages
contain at most two paths.
For testing purposes enable it in read-tree, which has no output
of paths other than an unpack-trees.c. These are all converted in
this patch.
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-01-18 04:05:20 +03:00
|
|
|
static const char *super_prefixed(const char *path)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* It is necessary and sufficient to have two static buffers
|
|
|
|
* here, as the return value of this function is fed to
|
|
|
|
* error() using the unpack_*_errors[] templates we see above.
|
|
|
|
*/
|
|
|
|
static struct strbuf buf[2] = {STRBUF_INIT, STRBUF_INIT};
|
|
|
|
static int super_prefix_len = -1;
|
|
|
|
static unsigned idx = ARRAY_SIZE(buf) - 1;
|
|
|
|
|
|
|
|
if (super_prefix_len < 0) {
|
|
|
|
const char *super_prefix = get_super_prefix();
|
|
|
|
if (!super_prefix) {
|
|
|
|
super_prefix_len = 0;
|
|
|
|
} else {
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(buf); i++)
|
|
|
|
strbuf_addstr(&buf[i], super_prefix);
|
|
|
|
super_prefix_len = buf[0].len;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!super_prefix_len)
|
|
|
|
return path;
|
|
|
|
|
|
|
|
if (++idx >= ARRAY_SIZE(buf))
|
|
|
|
idx = 0;
|
|
|
|
|
|
|
|
strbuf_setlen(&buf[idx], super_prefix_len);
|
|
|
|
strbuf_addstr(&buf[idx], path);
|
|
|
|
|
|
|
|
return buf[idx].buf;
|
|
|
|
}
|
|
|
|
|
2010-09-02 15:57:34 +04:00
|
|
|
void setup_unpack_trees_porcelain(struct unpack_trees_options *opts,
|
|
|
|
const char *cmd)
|
2010-09-02 15:57:33 +04:00
|
|
|
{
|
2010-11-15 22:52:19 +03:00
|
|
|
int i;
|
2010-09-02 15:57:34 +04:00
|
|
|
const char **msgs = opts->msgs;
|
2010-09-02 15:57:33 +04:00
|
|
|
const char *msg;
|
2014-06-19 00:02:13 +04:00
|
|
|
|
2020-07-28 23:25:12 +03:00
|
|
|
strvec_init(&opts->msgs_to_free);
|
2018-05-21 17:54:28 +03:00
|
|
|
|
2016-05-13 02:16:26 +03:00
|
|
|
if (!strcmp(cmd, "checkout"))
|
|
|
|
msg = advice_commit_before_merge
|
|
|
|
? _("Your local changes to the following files would be overwritten by checkout:\n%%s"
|
2016-06-25 09:34:04 +03:00
|
|
|
"Please commit your changes or stash them before you switch branches.")
|
2016-05-13 02:16:26 +03:00
|
|
|
: _("Your local changes to the following files would be overwritten by checkout:\n%%s");
|
|
|
|
else if (!strcmp(cmd, "merge"))
|
|
|
|
msg = advice_commit_before_merge
|
|
|
|
? _("Your local changes to the following files would be overwritten by merge:\n%%s"
|
2016-06-25 09:34:04 +03:00
|
|
|
"Please commit your changes or stash them before you merge.")
|
2016-05-13 02:16:26 +03:00
|
|
|
: _("Your local changes to the following files would be overwritten by merge:\n%%s");
|
2010-09-02 15:57:33 +04:00
|
|
|
else
|
2016-05-13 02:16:26 +03:00
|
|
|
msg = advice_commit_before_merge
|
|
|
|
? _("Your local changes to the following files would be overwritten by %s:\n%%s"
|
2016-06-25 09:34:04 +03:00
|
|
|
"Please commit your changes or stash them before you %s.")
|
2016-05-13 02:16:26 +03:00
|
|
|
: _("Your local changes to the following files would be overwritten by %s:\n%%s");
|
2014-06-19 00:02:13 +04:00
|
|
|
msgs[ERROR_WOULD_OVERWRITE] = msgs[ERROR_NOT_UPTODATE_FILE] =
|
2020-07-28 23:25:12 +03:00
|
|
|
strvec_pushf(&opts->msgs_to_free, msg, cmd, cmd);
|
2010-09-02 15:57:33 +04:00
|
|
|
|
|
|
|
msgs[ERROR_NOT_UPTODATE_DIR] =
|
2016-12-02 22:17:41 +03:00
|
|
|
_("Updating the following directories would lose untracked files in them:\n%s");
|
2010-09-02 15:57:33 +04:00
|
|
|
|
2016-05-13 02:16:26 +03:00
|
|
|
if (!strcmp(cmd, "checkout"))
|
|
|
|
msg = advice_commit_before_merge
|
|
|
|
? _("The following untracked working tree files would be removed by checkout:\n%%s"
|
2016-06-25 09:34:04 +03:00
|
|
|
"Please move or remove them before you switch branches.")
|
2016-05-13 02:16:26 +03:00
|
|
|
: _("The following untracked working tree files would be removed by checkout:\n%%s");
|
|
|
|
else if (!strcmp(cmd, "merge"))
|
|
|
|
msg = advice_commit_before_merge
|
|
|
|
? _("The following untracked working tree files would be removed by merge:\n%%s"
|
2016-06-25 09:34:04 +03:00
|
|
|
"Please move or remove them before you merge.")
|
2016-05-13 02:16:26 +03:00
|
|
|
: _("The following untracked working tree files would be removed by merge:\n%%s");
|
2010-09-02 15:57:33 +04:00
|
|
|
else
|
2016-05-13 02:16:26 +03:00
|
|
|
msg = advice_commit_before_merge
|
|
|
|
? _("The following untracked working tree files would be removed by %s:\n%%s"
|
2016-06-25 09:34:04 +03:00
|
|
|
"Please move or remove them before you %s.")
|
2016-05-13 02:16:26 +03:00
|
|
|
: _("The following untracked working tree files would be removed by %s:\n%%s");
|
2018-05-21 17:54:28 +03:00
|
|
|
msgs[ERROR_WOULD_LOSE_UNTRACKED_REMOVED] =
|
2020-07-28 23:25:12 +03:00
|
|
|
strvec_pushf(&opts->msgs_to_free, msg, cmd, cmd);
|
2016-05-13 02:16:26 +03:00
|
|
|
|
|
|
|
if (!strcmp(cmd, "checkout"))
|
|
|
|
msg = advice_commit_before_merge
|
|
|
|
? _("The following untracked working tree files would be overwritten by checkout:\n%%s"
|
2016-06-25 09:34:04 +03:00
|
|
|
"Please move or remove them before you switch branches.")
|
2016-05-13 02:16:26 +03:00
|
|
|
: _("The following untracked working tree files would be overwritten by checkout:\n%%s");
|
|
|
|
else if (!strcmp(cmd, "merge"))
|
|
|
|
msg = advice_commit_before_merge
|
|
|
|
? _("The following untracked working tree files would be overwritten by merge:\n%%s"
|
2016-06-25 09:34:04 +03:00
|
|
|
"Please move or remove them before you merge.")
|
2016-05-13 02:16:26 +03:00
|
|
|
: _("The following untracked working tree files would be overwritten by merge:\n%%s");
|
|
|
|
else
|
|
|
|
msg = advice_commit_before_merge
|
|
|
|
? _("The following untracked working tree files would be overwritten by %s:\n%%s"
|
2016-06-25 09:34:04 +03:00
|
|
|
"Please move or remove them before you %s.")
|
2016-05-13 02:16:26 +03:00
|
|
|
: _("The following untracked working tree files would be overwritten by %s:\n%%s");
|
2018-05-21 17:54:28 +03:00
|
|
|
msgs[ERROR_WOULD_LOSE_UNTRACKED_OVERWRITTEN] =
|
2020-07-28 23:25:12 +03:00
|
|
|
strvec_pushf(&opts->msgs_to_free, msg, cmd, cmd);
|
2010-09-02 15:57:33 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Special case: ERROR_BIND_OVERLAP refers to a pair of paths, we
|
|
|
|
* cannot easily display it as a list.
|
|
|
|
*/
|
2016-04-09 23:38:39 +03:00
|
|
|
msgs[ERROR_BIND_OVERLAP] = _("Entry '%s' overlaps with '%s'. Cannot bind.");
|
2010-09-02 15:57:33 +04:00
|
|
|
|
2017-03-15 00:46:39 +03:00
|
|
|
msgs[ERROR_WOULD_LOSE_SUBMODULE] =
|
2017-03-30 01:34:24 +03:00
|
|
|
_("Cannot update submodule:\n%s");
|
2010-09-02 20:08:15 +04:00
|
|
|
|
2020-03-27 03:48:56 +03:00
|
|
|
msgs[WARNING_SPARSE_NOT_UPTODATE_FILE] =
|
2020-03-27 03:48:58 +03:00
|
|
|
_("The following paths are not up to date and were left despite sparse patterns:\n%s");
|
2020-03-27 03:48:59 +03:00
|
|
|
msgs[WARNING_SPARSE_UNMERGED_FILE] =
|
|
|
|
_("The following paths are unmerged and were left despite sparse patterns:\n%s");
|
2020-03-27 03:48:56 +03:00
|
|
|
msgs[WARNING_SPARSE_ORPHANED_NOT_OVERWRITTEN] =
|
2020-03-27 03:48:58 +03:00
|
|
|
_("The following paths were already present and thus not updated despite sparse patterns:\n%s");
|
2010-09-02 20:08:15 +04:00
|
|
|
|
|
|
|
opts->show_all_errors = 1;
|
2010-11-15 22:52:19 +03:00
|
|
|
/* rejected paths may not have a static buffer */
|
|
|
|
for (i = 0; i < ARRAY_SIZE(opts->unpack_rejects); i++)
|
|
|
|
opts->unpack_rejects[i].strdup_strings = 1;
|
2010-09-02 15:57:33 +04:00
|
|
|
}
|
|
|
|
|
2018-05-21 17:54:28 +03:00
|
|
|
void clear_unpack_trees_porcelain(struct unpack_trees_options *opts)
|
|
|
|
{
|
2020-07-28 23:25:12 +03:00
|
|
|
strvec_clear(&opts->msgs_to_free);
|
2018-05-21 17:54:28 +03:00
|
|
|
memset(opts->msgs, 0, sizeof(opts->msgs));
|
|
|
|
}
|
|
|
|
|
2014-11-24 21:36:51 +03:00
|
|
|
static int do_add_entry(struct unpack_trees_options *o, struct cache_entry *ce,
|
unpack-trees: plug minor memory leak
The allocations made by unpack_nondirectories() using create_ce_entry()
are never freed.
In the non-merge case, we duplicate them using add_entry() and later
only look at the first allocated element (src[0]), perhaps even only
by mistake. Split out the actual addition from add_entry() into the
new helper do_add_entry() and call this non-duplicating function
instead of add_entry() to avoid the leak.
Valgrind reports this for the command "git archive v1.7.9" without
the patch:
==13372== LEAK SUMMARY:
==13372== definitely lost: 230,986 bytes in 2,325 blocks
==13372== indirectly lost: 0 bytes in 0 blocks
==13372== possibly lost: 98 bytes in 1 blocks
==13372== still reachable: 2,259,198 bytes in 3,243 blocks
==13372== suppressed: 0 bytes in 0 blocks
And with the patch applied:
==13375== LEAK SUMMARY:
==13375== definitely lost: 65 bytes in 1 blocks
==13375== indirectly lost: 0 bytes in 0 blocks
==13375== possibly lost: 0 bytes in 0 blocks
==13375== still reachable: 2,364,417 bytes in 3,245 blocks
==13375== suppressed: 0 bytes in 0 blocks
Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-03-06 23:37:23 +04:00
|
|
|
unsigned int set, unsigned int clear)
|
2007-08-10 23:15:54 +04:00
|
|
|
{
|
2013-11-14 23:22:27 +04:00
|
|
|
clear |= CE_HASHED;
|
2008-03-07 05:12:28 +03:00
|
|
|
|
2010-07-31 10:14:27 +04:00
|
|
|
if (set & CE_REMOVE)
|
|
|
|
set |= CE_WT_REMOVE;
|
|
|
|
|
unpack-trees: plug minor memory leak
The allocations made by unpack_nondirectories() using create_ce_entry()
are never freed.
In the non-merge case, we duplicate them using add_entry() and later
only look at the first allocated element (src[0]), perhaps even only
by mistake. Split out the actual addition from add_entry() into the
new helper do_add_entry() and call this non-duplicating function
instead of add_entry() to avoid the leak.
Valgrind reports this for the command "git archive v1.7.9" without
the patch:
==13372== LEAK SUMMARY:
==13372== definitely lost: 230,986 bytes in 2,325 blocks
==13372== indirectly lost: 0 bytes in 0 blocks
==13372== possibly lost: 98 bytes in 1 blocks
==13372== still reachable: 2,259,198 bytes in 3,243 blocks
==13372== suppressed: 0 bytes in 0 blocks
And with the patch applied:
==13375== LEAK SUMMARY:
==13375== definitely lost: 65 bytes in 1 blocks
==13375== indirectly lost: 0 bytes in 0 blocks
==13375== possibly lost: 0 bytes in 0 blocks
==13375== still reachable: 2,364,417 bytes in 3,245 blocks
==13375== suppressed: 0 bytes in 0 blocks
Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-03-06 23:37:23 +04:00
|
|
|
ce->ce_flags = (ce->ce_flags & ~clear) | set;
|
2014-11-24 21:36:51 +03:00
|
|
|
return add_index_entry(&o->result, ce,
|
|
|
|
ADD_CACHE_OK_TO_ADD | ADD_CACHE_OK_TO_REPLACE);
|
unpack-trees: plug minor memory leak
The allocations made by unpack_nondirectories() using create_ce_entry()
are never freed.
In the non-merge case, we duplicate them using add_entry() and later
only look at the first allocated element (src[0]), perhaps even only
by mistake. Split out the actual addition from add_entry() into the
new helper do_add_entry() and call this non-duplicating function
instead of add_entry() to avoid the leak.
Valgrind reports this for the command "git archive v1.7.9" without
the patch:
==13372== LEAK SUMMARY:
==13372== definitely lost: 230,986 bytes in 2,325 blocks
==13372== indirectly lost: 0 bytes in 0 blocks
==13372== possibly lost: 98 bytes in 1 blocks
==13372== still reachable: 2,259,198 bytes in 3,243 blocks
==13372== suppressed: 0 bytes in 0 blocks
And with the patch applied:
==13375== LEAK SUMMARY:
==13375== definitely lost: 65 bytes in 1 blocks
==13375== indirectly lost: 0 bytes in 0 blocks
==13375== possibly lost: 0 bytes in 0 blocks
==13375== still reachable: 2,364,417 bytes in 3,245 blocks
==13375== suppressed: 0 bytes in 0 blocks
Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-03-06 23:37:23 +04:00
|
|
|
}
|
|
|
|
|
2013-06-02 19:46:53 +04:00
|
|
|
static void add_entry(struct unpack_trees_options *o,
|
|
|
|
const struct cache_entry *ce,
|
|
|
|
unsigned int set, unsigned int clear)
|
|
|
|
{
|
block alloc: allocate cache entries from mem_pool
When reading large indexes from disk, a portion of the time is
dominated in malloc() calls. This can be mitigated by allocating a
large block of memory and manage it ourselves via memory pools.
This change moves the cache entry allocation to be on top of memory
pools.
Design:
The index_state struct will gain a notion of an associated memory_pool
from which cache_entries will be allocated from. When reading in the
index from disk, we have information on the number of entries and
their size, which can guide us in deciding how large our initial
memory allocation should be. When an index is discarded, the
associated memory_pool will be discarded as well - so the lifetime of
a cache_entry is tied to the lifetime of the index_state that it was
allocated for.
In the case of a Split Index, the following rules are followed. 1st,
some terminology is defined:
Terminology:
- 'the_index': represents the logical view of the index
- 'split_index': represents the "base" cache entries. Read from the
split index file.
'the_index' can reference a single split_index, as well as
cache_entries from the split_index. `the_index` will be discarded
before the `split_index` is. This means that when we are allocating
cache_entries in the presence of a split index, we need to allocate
the entries from the `split_index`'s memory pool. This allows us to
follow the pattern that `the_index` can reference cache_entries from
the `split_index`, and that the cache_entries will not be freed while
they are still being referenced.
Managing transient cache_entry structs:
Cache entries are usually allocated for an index, but this is not always
the case. Cache entries are sometimes allocated because this is the
type that the existing checkout_entry function works with. Because of
this, the existing code needs to handle cache entries associated with an
index / memory pool, and those that only exist transiently. Several
strategies were contemplated around how to handle this:
Chosen approach:
An extra field was added to the cache_entry type to track whether the
cache_entry was allocated from a memory pool or not. This is currently
an int field, as there are no more available bits in the existing
ce_flags bit field. If / when more bits are needed, this new field can
be turned into a proper bit field.
Alternatives:
1) Do not include any information about how the cache_entry was
allocated. Calling code would be responsible for tracking whether the
cache_entry needed to be freed or not.
Pro: No extra memory overhead to track this state
Con: Extra complexity in callers to handle this correctly.
The extra complexity and burden to not regress this behavior in the
future was more than we wanted.
2) cache_entry would gain knowledge about which mem_pool allocated it
Pro: Could (potentially) do extra logic to know when a mem_pool no
longer had references to any cache_entry
Con: cache_entry would grow heavier by a pointer, instead of int
We didn't see a tangible benefit to this approach
3) Do not add any extra information to a cache_entry, but when freeing a
cache entry, check if the memory exists in a region managed by existing
mem_pools.
Pro: No extra memory overhead to track state
Con: Extra computation is performed when freeing cache entries
We decided tracking and iterating over known memory pool regions was
less desirable than adding an extra field to track this stae.
Signed-off-by: Jameson Miller <jamill@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-02 22:49:37 +03:00
|
|
|
do_add_entry(o, dup_cache_entry(ce, &o->result), set, clear);
|
2007-08-10 23:15:54 +04:00
|
|
|
}
|
|
|
|
|
2010-08-11 12:38:07 +04:00
|
|
|
/*
|
|
|
|
* add error messages on path <path>
|
|
|
|
* corresponding to the type <e> with the message <msg>
|
|
|
|
* indicating if it should be display in porcelain or not
|
|
|
|
*/
|
|
|
|
static int add_rejected_path(struct unpack_trees_options *o,
|
|
|
|
enum unpack_trees_error_types e,
|
|
|
|
const char *path)
|
|
|
|
{
|
2019-03-22 12:31:36 +03:00
|
|
|
if (o->quiet)
|
2019-03-22 12:31:35 +03:00
|
|
|
return -1;
|
|
|
|
|
2010-08-11 12:38:07 +04:00
|
|
|
if (!o->show_all_errors)
|
unpack-trees: support super-prefix option
In the future we want to support working tree operations within submodules,
e.g. "git checkout --recurse-submodules", which will update the submodule
to the commit as recorded in its superproject. In the submodule the
unpack-tree operation is carried out as usual, but the reporting to the
user needs to prefix any path with the superproject. The mechanism for
this is the super-prefix. (see 74866d757, git: make super-prefix option)
Add support for the super-prefix option for commands that unpack trees
by wrapping any path output in unpacking trees in the newly introduced
super_prefixed function. This new function prefixes any path with the
super-prefix if there is one. Assuming the submodule case doesn't happen
in the majority of the cases, we'd want to have a fast behavior for no
super prefix, i.e. no reallocation/copying, but just returning path.
Another aspect of introducing the `super_prefixed` function is to consider
who owns the memory and if this is the right place where the path gets
modified. As the super prefix ought to change the output behavior only and
not the actual unpack tree part, it is fine to be that late in the line.
As we get passed in 'const char *path', we cannot change the path itself,
which means in case of a super prefix we have to copy over the path.
We need two static buffers in that function as the error messages
contain at most two paths.
For testing purposes enable it in read-tree, which has no output
of paths other than an unpack-trees.c. These are all converted in
this patch.
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-01-18 04:05:20 +03:00
|
|
|
return error(ERRORMSG(o, e), super_prefixed(path));
|
2010-08-11 12:38:07 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Otherwise, insert in a list for future display by
|
2020-03-27 03:48:57 +03:00
|
|
|
* display_(error|warning)_msgs()
|
2010-08-11 12:38:07 +04:00
|
|
|
*/
|
2010-11-15 22:52:19 +03:00
|
|
|
string_list_append(&o->unpack_rejects[e], path);
|
2010-08-11 12:38:07 +04:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* display all the error messages stored in a nice way
|
|
|
|
*/
|
|
|
|
static void display_error_msgs(struct unpack_trees_options *o)
|
|
|
|
{
|
2020-03-27 03:48:57 +03:00
|
|
|
int e;
|
|
|
|
unsigned error_displayed = 0;
|
2010-08-11 12:38:07 +04:00
|
|
|
for (e = 0; e < NB_UNPACK_TREES_ERROR_TYPES; e++) {
|
2010-11-15 22:52:19 +03:00
|
|
|
struct string_list *rejects = &o->unpack_rejects[e];
|
2020-03-27 03:48:57 +03:00
|
|
|
|
2010-11-15 22:52:19 +03:00
|
|
|
if (rejects->nr > 0) {
|
2020-03-27 03:48:57 +03:00
|
|
|
int i;
|
2010-08-11 12:38:07 +04:00
|
|
|
struct strbuf path = STRBUF_INIT;
|
2020-03-27 03:48:57 +03:00
|
|
|
|
|
|
|
error_displayed = 1;
|
2010-11-15 22:52:19 +03:00
|
|
|
for (i = 0; i < rejects->nr; i++)
|
|
|
|
strbuf_addf(&path, "\t%s\n", rejects->items[i].string);
|
unpack-trees: support super-prefix option
In the future we want to support working tree operations within submodules,
e.g. "git checkout --recurse-submodules", which will update the submodule
to the commit as recorded in its superproject. In the submodule the
unpack-tree operation is carried out as usual, but the reporting to the
user needs to prefix any path with the superproject. The mechanism for
this is the super-prefix. (see 74866d757, git: make super-prefix option)
Add support for the super-prefix option for commands that unpack trees
by wrapping any path output in unpacking trees in the newly introduced
super_prefixed function. This new function prefixes any path with the
super-prefix if there is one. Assuming the submodule case doesn't happen
in the majority of the cases, we'd want to have a fast behavior for no
super prefix, i.e. no reallocation/copying, but just returning path.
Another aspect of introducing the `super_prefixed` function is to consider
who owns the memory and if this is the right place where the path gets
modified. As the super prefix ought to change the output behavior only and
not the actual unpack tree part, it is fine to be that late in the line.
As we get passed in 'const char *path', we cannot change the path itself,
which means in case of a super prefix we have to copy over the path.
We need two static buffers in that function as the error messages
contain at most two paths.
For testing purposes enable it in read-tree, which has no output
of paths other than an unpack-trees.c. These are all converted in
this patch.
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-01-18 04:05:20 +03:00
|
|
|
error(ERRORMSG(o, e), super_prefixed(path.buf));
|
2010-08-11 12:38:07 +04:00
|
|
|
strbuf_release(&path);
|
|
|
|
}
|
2010-11-15 22:52:19 +03:00
|
|
|
string_list_clear(rejects, 0);
|
2010-08-11 12:38:07 +04:00
|
|
|
}
|
2020-03-27 03:48:57 +03:00
|
|
|
if (error_displayed)
|
2016-04-09 23:38:39 +03:00
|
|
|
fprintf(stderr, _("Aborting\n"));
|
2010-08-11 12:38:07 +04:00
|
|
|
}
|
|
|
|
|
2020-03-27 03:48:57 +03:00
|
|
|
/*
|
|
|
|
* display all the warning messages stored in a nice way
|
|
|
|
*/
|
|
|
|
static void display_warning_msgs(struct unpack_trees_options *o)
|
|
|
|
{
|
|
|
|
int e;
|
|
|
|
unsigned warning_displayed = 0;
|
|
|
|
for (e = NB_UNPACK_TREES_ERROR_TYPES + 1;
|
|
|
|
e < NB_UNPACK_TREES_WARNING_TYPES; e++) {
|
|
|
|
struct string_list *rejects = &o->unpack_rejects[e];
|
|
|
|
|
|
|
|
if (rejects->nr > 0) {
|
|
|
|
int i;
|
|
|
|
struct strbuf path = STRBUF_INIT;
|
|
|
|
|
|
|
|
warning_displayed = 1;
|
|
|
|
for (i = 0; i < rejects->nr; i++)
|
|
|
|
strbuf_addf(&path, "\t%s\n", rejects->items[i].string);
|
|
|
|
warning(ERRORMSG(o, e), super_prefixed(path.buf));
|
|
|
|
strbuf_release(&path);
|
|
|
|
}
|
|
|
|
string_list_clear(rejects, 0);
|
|
|
|
}
|
|
|
|
if (warning_displayed)
|
|
|
|
fprintf(stderr, _("After fixing the above paths, you may want to run `git sparse-checkout reapply`.\n"));
|
|
|
|
}
|
2017-03-15 00:46:39 +03:00
|
|
|
static int check_submodule_move_head(const struct cache_entry *ce,
|
|
|
|
const char *old_id,
|
|
|
|
const char *new_id,
|
|
|
|
struct unpack_trees_options *o)
|
|
|
|
{
|
2017-04-19 00:37:22 +03:00
|
|
|
unsigned flags = SUBMODULE_MOVE_HEAD_DRY_RUN;
|
2017-03-15 00:46:39 +03:00
|
|
|
const struct submodule *sub = submodule_from_ce(ce);
|
2017-08-03 21:19:53 +03:00
|
|
|
|
2017-03-15 00:46:39 +03:00
|
|
|
if (!sub)
|
|
|
|
return 0;
|
|
|
|
|
2017-04-19 00:37:22 +03:00
|
|
|
if (o->reset)
|
|
|
|
flags |= SUBMODULE_MOVE_HEAD_FORCE;
|
|
|
|
|
2017-08-03 21:19:53 +03:00
|
|
|
if (submodule_move_head(ce->name, old_id, new_id, flags))
|
2019-03-22 12:31:35 +03:00
|
|
|
return add_rejected_path(o, ERROR_WOULD_LOSE_SUBMODULE, ce->name);
|
2017-08-03 21:19:53 +03:00
|
|
|
return 0;
|
2017-03-15 00:46:39 +03:00
|
|
|
}
|
|
|
|
|
2017-08-03 21:19:59 +03:00
|
|
|
/*
|
2019-11-05 20:07:23 +03:00
|
|
|
* Perform the loading of the repository's gitmodules file. This function is
|
2017-08-03 21:19:59 +03:00
|
|
|
* used by 'check_update()' to perform loading of the gitmodules file in two
|
2019-11-05 20:07:23 +03:00
|
|
|
* different situations:
|
2017-08-03 21:19:59 +03:00
|
|
|
* (1) before removing entries from the working tree if the gitmodules file has
|
|
|
|
* been marked for removal. This situation is specified by 'state' == NULL.
|
|
|
|
* (2) before checking out entries to the working tree if the gitmodules file
|
|
|
|
* has been marked for update. This situation is specified by 'state' != NULL.
|
|
|
|
*/
|
|
|
|
static void load_gitmodules_file(struct index_state *index,
|
|
|
|
struct checkout *state)
|
2017-03-15 00:46:39 +03:00
|
|
|
{
|
2017-08-03 21:19:59 +03:00
|
|
|
int pos = index_name_pos(index, GITMODULES_FILE, strlen(GITMODULES_FILE));
|
|
|
|
|
|
|
|
if (pos >= 0) {
|
|
|
|
struct cache_entry *ce = index->cache[pos];
|
|
|
|
if (!state && ce->ce_flags & CE_WT_REMOVE) {
|
2020-01-16 05:39:55 +03:00
|
|
|
repo_read_gitmodules(the_repository, 0);
|
2017-08-03 21:19:59 +03:00
|
|
|
} else if (state && (ce->ce_flags & CE_UPDATE)) {
|
2018-03-29 01:35:28 +03:00
|
|
|
submodule_free(the_repository);
|
2018-11-13 21:28:00 +03:00
|
|
|
checkout_entry(ce, state, NULL, NULL);
|
2020-01-16 05:39:55 +03:00
|
|
|
repo_read_gitmodules(the_repository, 0);
|
2017-03-15 00:46:39 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-14 22:53:22 +03:00
|
|
|
static struct progress *get_progress(struct unpack_trees_options *o,
|
|
|
|
struct index_state *index)
|
2006-07-30 22:25:18 +04:00
|
|
|
{
|
2007-04-18 22:27:45 +04:00
|
|
|
unsigned cnt = 0, total = 0;
|
2017-01-09 22:46:19 +03:00
|
|
|
|
|
|
|
if (!o->update || !o->verbose_update)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
for (; cnt < index->cache_nr; cnt++) {
|
|
|
|
const struct cache_entry *ce = index->cache[cnt];
|
|
|
|
if (ce->ce_flags & (CE_UPDATE | CE_WT_REMOVE))
|
|
|
|
total++;
|
|
|
|
}
|
|
|
|
|
2019-03-29 13:39:19 +03:00
|
|
|
return start_delayed_progress(_("Updating files"), total);
|
2017-01-09 22:46:19 +03:00
|
|
|
}
|
|
|
|
|
2018-08-17 21:00:39 +03:00
|
|
|
static void setup_collided_checkout_detection(struct checkout *state,
|
|
|
|
struct index_state *index)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
state->clone = 1;
|
|
|
|
for (i = 0; i < index->cache_nr; i++)
|
|
|
|
index->cache[i]->ce_flags &= ~CE_MATCHED;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void report_collided_checkout(struct index_state *index)
|
|
|
|
{
|
|
|
|
struct string_list list = STRING_LIST_INIT_NODUP;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < index->cache_nr; i++) {
|
|
|
|
struct cache_entry *ce = index->cache[i];
|
|
|
|
|
|
|
|
if (!(ce->ce_flags & CE_MATCHED))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
string_list_append(&list, ce->name);
|
|
|
|
ce->ce_flags &= ~CE_MATCHED;
|
|
|
|
}
|
|
|
|
|
|
|
|
list.cmp = fspathcmp;
|
|
|
|
string_list_sort(&list);
|
|
|
|
|
|
|
|
if (list.nr) {
|
|
|
|
warning(_("the following paths have collided (e.g. case-sensitive paths\n"
|
|
|
|
"on a case-insensitive filesystem) and only one from the same\n"
|
|
|
|
"colliding group is in the working tree:\n"));
|
|
|
|
|
|
|
|
for (i = 0; i < list.nr; i++)
|
|
|
|
fprintf(stderr, " '%s'\n", list.items[i].string);
|
|
|
|
}
|
|
|
|
|
|
|
|
string_list_clear(&list, 0);
|
|
|
|
}
|
|
|
|
|
2020-03-27 03:48:49 +03:00
|
|
|
static int check_updates(struct unpack_trees_options *o,
|
|
|
|
struct index_state *index)
|
2017-01-09 22:46:19 +03:00
|
|
|
{
|
|
|
|
unsigned cnt = 0;
|
2017-01-09 22:46:17 +03:00
|
|
|
int errs = 0;
|
2018-10-18 21:46:04 +03:00
|
|
|
struct progress *progress;
|
2017-01-09 22:46:17 +03:00
|
|
|
struct checkout state = CHECKOUT_INIT;
|
parallel-checkout: add configuration options
Make parallel checkout configurable by introducing two new settings:
checkout.workers and checkout.thresholdForParallelism. The first defines
the number of workers (where one means sequential checkout), and the
second defines the minimum number of entries to attempt parallel
checkout.
To decide the default value for checkout.workers, the parallel version
was benchmarked during three operations in the linux repo, with cold
cache: cloning v5.8, checking out v5.8 from v2.6.15 (checkout I) and
checking out v5.8 from v5.7 (checkout II). The four tables below show
the mean run times and standard deviations for 5 runs in: a local file
system on SSD, a local file system on HDD, a Linux NFS server, and
Amazon EFS (all on Linux). Each parallel checkout test was executed with
the number of workers that brings the best overall results in that
environment.
Local SSD:
Sequential 10 workers Speedup
Clone 8.805 s ± 0.043 s 3.564 s ± 0.041 s 2.47 ± 0.03
Checkout I 9.678 s ± 0.057 s 4.486 s ± 0.050 s 2.16 ± 0.03
Checkout II 5.034 s ± 0.072 s 3.021 s ± 0.038 s 1.67 ± 0.03
Local HDD:
Sequential 10 workers Speedup
Clone 32.288 s ± 0.580 s 30.724 s ± 0.522 s 1.05 ± 0.03
Checkout I 54.172 s ± 7.119 s 54.429 s ± 6.738 s 1.00 ± 0.18
Checkout II 40.465 s ± 2.402 s 38.682 s ± 1.365 s 1.05 ± 0.07
Linux NFS server (v4.1, on EBS, single availability zone):
Sequential 32 workers Speedup
Clone 240.368 s ± 6.347 s 57.349 s ± 0.870 s 4.19 ± 0.13
Checkout I 242.862 s ± 2.215 s 58.700 s ± 0.904 s 4.14 ± 0.07
Checkout II 65.751 s ± 1.577 s 23.820 s ± 0.407 s 2.76 ± 0.08
EFS (v4.1, replicated over multiple availability zones):
Sequential 32 workers Speedup
Clone 922.321 s ± 2.274 s 210.453 s ± 3.412 s 4.38 ± 0.07
Checkout I 1011.300 s ± 7.346 s 297.828 s ± 0.964 s 3.40 ± 0.03
Checkout II 294.104 s ± 1.836 s 126.017 s ± 1.190 s 2.33 ± 0.03
The above benchmarks show that parallel checkout is most effective on
repositories located on an SSD or over a distributed file system. For
local file systems on spinning disks, and/or older machines, the
parallelism does not always bring a good performance. For this reason,
the default value for checkout.workers is one, a.k.a. sequential
checkout.
To decide the default value for checkout.thresholdForParallelism,
another benchmark was executed in the "Local SSD" setup, where parallel
checkout showed to be beneficial. This time, we compared the runtime of
a `git checkout -f`, with and without parallelism, after randomly
removing an increasing number of files from the Linux working tree. The
"sequential fallback" column below corresponds to the executions where
checkout.workers was 10 but checkout.thresholdForParallelism was equal
to the number of to-be-updated files plus one (so that we end up writing
sequentially). Each test case was sampled 15 times, and each sample had
a randomly different set of files removed. Here are the results:
sequential fallback 10 workers speedup
10 files 772.3 ms ± 12.6 ms 769.0 ms ± 13.6 ms 1.00 ± 0.02
20 files 780.5 ms ± 15.8 ms 775.2 ms ± 9.2 ms 1.01 ± 0.02
50 files 806.2 ms ± 13.8 ms 767.4 ms ± 8.5 ms 1.05 ± 0.02
100 files 833.7 ms ± 21.4 ms 750.5 ms ± 16.8 ms 1.11 ± 0.04
200 files 897.6 ms ± 30.9 ms 730.5 ms ± 14.7 ms 1.23 ± 0.05
500 files 1035.4 ms ± 48.0 ms 677.1 ms ± 22.3 ms 1.53 ± 0.09
1000 files 1244.6 ms ± 35.6 ms 654.0 ms ± 38.3 ms 1.90 ± 0.12
2000 files 1488.8 ms ± 53.4 ms 658.8 ms ± 23.8 ms 2.26 ± 0.12
From the above numbers, 100 files seems to be a reasonable default value
for the threshold setting.
Note: Up to 1000 files, we observe a drop in the execution time of the
parallel code with an increase in the number of files. This is a rather
odd behavior, but it was observed in multiple repetitions. Above 1000
files, the execution time increases according to the number of files, as
one would expect.
About the test environments: Local SSD tests were executed on an
i7-7700HQ (4 cores with hyper-threading) running Manjaro Linux. Local
HDD tests were executed on an Intel(R) Xeon(R) E3-1230 (also 4 cores
with hyper-threading), HDD Seagate Barracuda 7200.14 SATA 3.1, running
Debian. NFS and EFS tests were executed on an Amazon EC2 c5n.xlarge
instance, with 4 vCPUs. The Linux NFS server was running on a m6g.large
instance with 2 vCPUSs and a 1 TB EBS GP2 volume. Before each timing,
the linux repository was removed (or checked out back to its previous
state), and `sync && sysctl vm.drop_caches=3` was executed.
Co-authored-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-04-19 03:14:55 +03:00
|
|
|
int i, pc_workers, pc_threshold;
|
2006-07-30 22:25:18 +04:00
|
|
|
|
2018-08-18 17:41:23 +03:00
|
|
|
trace_performance_enter();
|
2017-01-09 22:46:17 +03:00
|
|
|
state.force = 1;
|
|
|
|
state.quiet = 1;
|
|
|
|
state.refresh_cache = 1;
|
|
|
|
state.istate = index;
|
2020-03-16 21:05:04 +03:00
|
|
|
clone_checkout_metadata(&state.meta, &o->meta, NULL);
|
2006-07-30 22:25:18 +04:00
|
|
|
|
unpack-trees: exit check_updates() early if updates are not wanted
check_updates() has a lot of code that repeatedly checks whether
o->update or o->dry_run are set. (Note that o->dry_run is a
near-synonym for !o->update, but not quite as per commit 2c9078d05bf2
("unpack-trees: add the dry_run flag to unpack_trees_options",
2011-05-25).) In fact, this function almost turns into a no-op whenever
the condition
!o->update || o->dry_run
is met. Simplify the code by checking this condition at the beginning
of the function, and when it is true, do the few things that are
relevant and return early.
There are a few things that make the conversion not quite obvious:
* The fact that check_updates() does not actually turn into a no-op
when updates are not wanted may be slightly surprising. However,
commit 33ecf7eb61 (Discard "deleted" cache entries after using them
to update the working tree, 2008-02-07) put the discarding of
unused cache entries in check_updates() so we still need to keep
the call to remove_marked_cache_entries(). It's possible this
call belongs in another function, but it is certainly needed as
tests will fail if it is removed.
* The original called remove_scheduled_dirs() unconditionally.
Technically, commit 7847892716 (unlink_entry(): introduce
schedule_dir_for_removal(), 2009-02-09) should have made that call
conditional, but it didn't matter in practice because
remove_scheduled_dirs() becomes a no-op when all the calls to
unlink_entry() are skipped. As such, we do not need to call it.
* When (o->dry_run && o->update), the original would have two calls
to git_attr_set_direction() surrounding a bunch of skipped updates.
These two calls to git_attr_set_direction() cancel each other out
and thus can be omitted when o->dry_run is true just as they
already are when !o->update.
* The code would previously call setup_collided_checkout_detection()
and report_collided_checkout() even when o->dry_run. However, this
was just an expensive no-op because
setup_collided_checkout_detection() merely cleared the CE_MATCHED
flag for each cache entry, and report_collided_checkout() reported
which ones had it set. Since a dry-run would skip all the
checkout_entry() calls, CE_MATCHED would never get set and thus
no collisions would be reported. Since we can't detect the
collisions anyway without doing updates, skipping the collisions
detection setup and reporting is an optimization.
* The code previously would call get_progress() and
display_progress() even when (!o->update || o->dry_run). This
served to show how long it took to skip all the updates, which is
somewhat useless. Since we are skipping the updates, we can skip
showing how long it takes to skip them.
Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-01-07 09:57:57 +03:00
|
|
|
if (!o->update || o->dry_run) {
|
|
|
|
remove_marked_cache_entries(index, 0);
|
|
|
|
trace_performance_leave("check_updates");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-08-17 21:00:39 +03:00
|
|
|
if (o->clone)
|
|
|
|
setup_collided_checkout_detection(&state, index);
|
|
|
|
|
2020-05-14 22:53:22 +03:00
|
|
|
progress = get_progress(o, index);
|
2006-07-30 22:25:18 +04:00
|
|
|
|
2021-02-03 00:37:10 +03:00
|
|
|
/* Start with clean cache to avoid using any possibly outdated info. */
|
|
|
|
invalidate_lstat_cache();
|
|
|
|
|
unpack-trees: exit check_updates() early if updates are not wanted
check_updates() has a lot of code that repeatedly checks whether
o->update or o->dry_run are set. (Note that o->dry_run is a
near-synonym for !o->update, but not quite as per commit 2c9078d05bf2
("unpack-trees: add the dry_run flag to unpack_trees_options",
2011-05-25).) In fact, this function almost turns into a no-op whenever
the condition
!o->update || o->dry_run
is met. Simplify the code by checking this condition at the beginning
of the function, and when it is true, do the few things that are
relevant and return early.
There are a few things that make the conversion not quite obvious:
* The fact that check_updates() does not actually turn into a no-op
when updates are not wanted may be slightly surprising. However,
commit 33ecf7eb61 (Discard "deleted" cache entries after using them
to update the working tree, 2008-02-07) put the discarding of
unused cache entries in check_updates() so we still need to keep
the call to remove_marked_cache_entries(). It's possible this
call belongs in another function, but it is certainly needed as
tests will fail if it is removed.
* The original called remove_scheduled_dirs() unconditionally.
Technically, commit 7847892716 (unlink_entry(): introduce
schedule_dir_for_removal(), 2009-02-09) should have made that call
conditional, but it didn't matter in practice because
remove_scheduled_dirs() becomes a no-op when all the calls to
unlink_entry() are skipped. As such, we do not need to call it.
* When (o->dry_run && o->update), the original would have two calls
to git_attr_set_direction() surrounding a bunch of skipped updates.
These two calls to git_attr_set_direction() cancel each other out
and thus can be omitted when o->dry_run is true just as they
already are when !o->update.
* The code would previously call setup_collided_checkout_detection()
and report_collided_checkout() even when o->dry_run. However, this
was just an expensive no-op because
setup_collided_checkout_detection() merely cleared the CE_MATCHED
flag for each cache entry, and report_collided_checkout() reported
which ones had it set. Since a dry-run would skip all the
checkout_entry() calls, CE_MATCHED would never get set and thus
no collisions would be reported. Since we can't detect the
collisions anyway without doing updates, skipping the collisions
detection setup and reporting is an optimization.
* The code previously would call get_progress() and
display_progress() even when (!o->update || o->dry_run). This
served to show how long it took to skip all the updates, which is
somewhat useless. Since we are skipping the updates, we can skip
showing how long it takes to skip them.
Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-01-07 09:57:57 +03:00
|
|
|
git_attr_set_direction(GIT_ATTR_CHECKOUT);
|
2017-08-03 21:19:59 +03:00
|
|
|
|
unpack-trees: exit check_updates() early if updates are not wanted
check_updates() has a lot of code that repeatedly checks whether
o->update or o->dry_run are set. (Note that o->dry_run is a
near-synonym for !o->update, but not quite as per commit 2c9078d05bf2
("unpack-trees: add the dry_run flag to unpack_trees_options",
2011-05-25).) In fact, this function almost turns into a no-op whenever
the condition
!o->update || o->dry_run
is met. Simplify the code by checking this condition at the beginning
of the function, and when it is true, do the few things that are
relevant and return early.
There are a few things that make the conversion not quite obvious:
* The fact that check_updates() does not actually turn into a no-op
when updates are not wanted may be slightly surprising. However,
commit 33ecf7eb61 (Discard "deleted" cache entries after using them
to update the working tree, 2008-02-07) put the discarding of
unused cache entries in check_updates() so we still need to keep
the call to remove_marked_cache_entries(). It's possible this
call belongs in another function, but it is certainly needed as
tests will fail if it is removed.
* The original called remove_scheduled_dirs() unconditionally.
Technically, commit 7847892716 (unlink_entry(): introduce
schedule_dir_for_removal(), 2009-02-09) should have made that call
conditional, but it didn't matter in practice because
remove_scheduled_dirs() becomes a no-op when all the calls to
unlink_entry() are skipped. As such, we do not need to call it.
* When (o->dry_run && o->update), the original would have two calls
to git_attr_set_direction() surrounding a bunch of skipped updates.
These two calls to git_attr_set_direction() cancel each other out
and thus can be omitted when o->dry_run is true just as they
already are when !o->update.
* The code would previously call setup_collided_checkout_detection()
and report_collided_checkout() even when o->dry_run. However, this
was just an expensive no-op because
setup_collided_checkout_detection() merely cleared the CE_MATCHED
flag for each cache entry, and report_collided_checkout() reported
which ones had it set. Since a dry-run would skip all the
checkout_entry() calls, CE_MATCHED would never get set and thus
no collisions would be reported. Since we can't detect the
collisions anyway without doing updates, skipping the collisions
detection setup and reporting is an optimization.
* The code previously would call get_progress() and
display_progress() even when (!o->update || o->dry_run). This
served to show how long it took to skip all the updates, which is
somewhat useless. Since we are skipping the updates, we can skip
showing how long it takes to skip them.
Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-01-07 09:57:57 +03:00
|
|
|
if (should_update_submodules())
|
2017-08-03 21:19:59 +03:00
|
|
|
load_gitmodules_file(index, NULL);
|
|
|
|
|
2008-03-07 05:12:28 +03:00
|
|
|
for (i = 0; i < index->cache_nr; i++) {
|
Convert "struct cache_entry *" to "const ..." wherever possible
I attempted to make index_state->cache[] a "const struct cache_entry **"
to find out how existing entries in index are modified and where. The
question I have is what do we do if we really need to keep track of on-disk
changes in the index. The result is
- diff-lib.c: setting CE_UPTODATE
- name-hash.c: setting CE_HASHED
- preload-index.c, read-cache.c, unpack-trees.c and
builtin/update-index: obvious
- entry.c: write_entry() may refresh the checked out entry via
fill_stat_cache_info(). This causes "non-const struct cache_entry
*" in builtin/apply.c, builtin/checkout-index.c and
builtin/checkout.c
- builtin/ls-files.c: --with-tree changes stagemask and may set
CE_UPDATE
Of these, write_entry() and its call sites are probably most
interesting because it modifies on-disk info. But this is stat info
and can be retrieved via refresh, at least for porcelain
commands. Other just uses ce_flags for local purposes.
So, keeping track of "dirty" entries is just a matter of setting a
flag in index modification functions exposed by read-cache.c. Except
unpack-trees, the rest of the code base does not do anything funny
behind read-cache's back.
The actual patch is less valueable than the summary above. But if
anyone wants to re-identify the above sites. Applying this patch, then
this:
diff --git a/cache.h b/cache.h
index 430d021..1692891 100644
--- a/cache.h
+++ b/cache.h
@@ -267,7 +267,7 @@ static inline unsigned int canon_mode(unsigned int mode)
#define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1)
struct index_state {
- struct cache_entry **cache;
+ const struct cache_entry **cache;
unsigned int version;
unsigned int cache_nr, cache_alloc, cache_changed;
struct string_list *resolve_undo;
will help quickly identify them without bogus warnings.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-09 19:29:00 +04:00
|
|
|
const struct cache_entry *ce = index->cache[i];
|
2006-07-30 22:25:18 +04:00
|
|
|
|
2009-08-20 17:47:06 +04:00
|
|
|
if (ce->ce_flags & CE_WT_REMOVE) {
|
|
|
|
display_progress(progress, ++cnt);
|
unpack-trees: exit check_updates() early if updates are not wanted
check_updates() has a lot of code that repeatedly checks whether
o->update or o->dry_run are set. (Note that o->dry_run is a
near-synonym for !o->update, but not quite as per commit 2c9078d05bf2
("unpack-trees: add the dry_run flag to unpack_trees_options",
2011-05-25).) In fact, this function almost turns into a no-op whenever
the condition
!o->update || o->dry_run
is met. Simplify the code by checking this condition at the beginning
of the function, and when it is true, do the few things that are
relevant and return early.
There are a few things that make the conversion not quite obvious:
* The fact that check_updates() does not actually turn into a no-op
when updates are not wanted may be slightly surprising. However,
commit 33ecf7eb61 (Discard "deleted" cache entries after using them
to update the working tree, 2008-02-07) put the discarding of
unused cache entries in check_updates() so we still need to keep
the call to remove_marked_cache_entries(). It's possible this
call belongs in another function, but it is certainly needed as
tests will fail if it is removed.
* The original called remove_scheduled_dirs() unconditionally.
Technically, commit 7847892716 (unlink_entry(): introduce
schedule_dir_for_removal(), 2009-02-09) should have made that call
conditional, but it didn't matter in practice because
remove_scheduled_dirs() becomes a no-op when all the calls to
unlink_entry() are skipped. As such, we do not need to call it.
* When (o->dry_run && o->update), the original would have two calls
to git_attr_set_direction() surrounding a bunch of skipped updates.
These two calls to git_attr_set_direction() cancel each other out
and thus can be omitted when o->dry_run is true just as they
already are when !o->update.
* The code would previously call setup_collided_checkout_detection()
and report_collided_checkout() even when o->dry_run. However, this
was just an expensive no-op because
setup_collided_checkout_detection() merely cleared the CE_MATCHED
flag for each cache entry, and report_collided_checkout() reported
which ones had it set. Since a dry-run would skip all the
checkout_entry() calls, CE_MATCHED would never get set and thus
no collisions would be reported. Since we can't detect the
collisions anyway without doing updates, skipping the collisions
detection setup and reporting is an optimization.
* The code previously would call get_progress() and
display_progress() even when (!o->update || o->dry_run). This
served to show how long it took to skip all the updates, which is
somewhat useless. Since we are skipping the updates, we can skip
showing how long it takes to skip them.
Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-01-07 09:57:57 +03:00
|
|
|
unlink_entry(ce);
|
2009-08-20 17:47:06 +04:00
|
|
|
}
|
2008-03-22 19:48:41 +03:00
|
|
|
}
|
unpack-trees: exit check_updates() early if updates are not wanted
check_updates() has a lot of code that repeatedly checks whether
o->update or o->dry_run are set. (Note that o->dry_run is a
near-synonym for !o->update, but not quite as per commit 2c9078d05bf2
("unpack-trees: add the dry_run flag to unpack_trees_options",
2011-05-25).) In fact, this function almost turns into a no-op whenever
the condition
!o->update || o->dry_run
is met. Simplify the code by checking this condition at the beginning
of the function, and when it is true, do the few things that are
relevant and return early.
There are a few things that make the conversion not quite obvious:
* The fact that check_updates() does not actually turn into a no-op
when updates are not wanted may be slightly surprising. However,
commit 33ecf7eb61 (Discard "deleted" cache entries after using them
to update the working tree, 2008-02-07) put the discarding of
unused cache entries in check_updates() so we still need to keep
the call to remove_marked_cache_entries(). It's possible this
call belongs in another function, but it is certainly needed as
tests will fail if it is removed.
* The original called remove_scheduled_dirs() unconditionally.
Technically, commit 7847892716 (unlink_entry(): introduce
schedule_dir_for_removal(), 2009-02-09) should have made that call
conditional, but it didn't matter in practice because
remove_scheduled_dirs() becomes a no-op when all the calls to
unlink_entry() are skipped. As such, we do not need to call it.
* When (o->dry_run && o->update), the original would have two calls
to git_attr_set_direction() surrounding a bunch of skipped updates.
These two calls to git_attr_set_direction() cancel each other out
and thus can be omitted when o->dry_run is true just as they
already are when !o->update.
* The code would previously call setup_collided_checkout_detection()
and report_collided_checkout() even when o->dry_run. However, this
was just an expensive no-op because
setup_collided_checkout_detection() merely cleared the CE_MATCHED
flag for each cache entry, and report_collided_checkout() reported
which ones had it set. Since a dry-run would skip all the
checkout_entry() calls, CE_MATCHED would never get set and thus
no collisions would be reported. Since we can't detect the
collisions anyway without doing updates, skipping the collisions
detection setup and reporting is an optimization.
* The code previously would call get_progress() and
display_progress() even when (!o->update || o->dry_run). This
served to show how long it took to skip all the updates, which is
somewhat useless. Since we are skipping the updates, we can skip
showing how long it takes to skip them.
Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-01-07 09:57:57 +03:00
|
|
|
|
2018-12-20 16:48:16 +03:00
|
|
|
remove_marked_cache_entries(index, 0);
|
2009-02-09 23:54:07 +03:00
|
|
|
remove_scheduled_dirs();
|
2008-03-22 19:48:41 +03:00
|
|
|
|
unpack-trees: exit check_updates() early if updates are not wanted
check_updates() has a lot of code that repeatedly checks whether
o->update or o->dry_run are set. (Note that o->dry_run is a
near-synonym for !o->update, but not quite as per commit 2c9078d05bf2
("unpack-trees: add the dry_run flag to unpack_trees_options",
2011-05-25).) In fact, this function almost turns into a no-op whenever
the condition
!o->update || o->dry_run
is met. Simplify the code by checking this condition at the beginning
of the function, and when it is true, do the few things that are
relevant and return early.
There are a few things that make the conversion not quite obvious:
* The fact that check_updates() does not actually turn into a no-op
when updates are not wanted may be slightly surprising. However,
commit 33ecf7eb61 (Discard "deleted" cache entries after using them
to update the working tree, 2008-02-07) put the discarding of
unused cache entries in check_updates() so we still need to keep
the call to remove_marked_cache_entries(). It's possible this
call belongs in another function, but it is certainly needed as
tests will fail if it is removed.
* The original called remove_scheduled_dirs() unconditionally.
Technically, commit 7847892716 (unlink_entry(): introduce
schedule_dir_for_removal(), 2009-02-09) should have made that call
conditional, but it didn't matter in practice because
remove_scheduled_dirs() becomes a no-op when all the calls to
unlink_entry() are skipped. As such, we do not need to call it.
* When (o->dry_run && o->update), the original would have two calls
to git_attr_set_direction() surrounding a bunch of skipped updates.
These two calls to git_attr_set_direction() cancel each other out
and thus can be omitted when o->dry_run is true just as they
already are when !o->update.
* The code would previously call setup_collided_checkout_detection()
and report_collided_checkout() even when o->dry_run. However, this
was just an expensive no-op because
setup_collided_checkout_detection() merely cleared the CE_MATCHED
flag for each cache entry, and report_collided_checkout() reported
which ones had it set. Since a dry-run would skip all the
checkout_entry() calls, CE_MATCHED would never get set and thus
no collisions would be reported. Since we can't detect the
collisions anyway without doing updates, skipping the collisions
detection setup and reporting is an optimization.
* The code previously would call get_progress() and
display_progress() even when (!o->update || o->dry_run). This
served to show how long it took to skip all the updates, which is
somewhat useless. Since we are skipping the updates, we can skip
showing how long it takes to skip them.
Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-01-07 09:57:57 +03:00
|
|
|
if (should_update_submodules())
|
2017-08-03 21:19:59 +03:00
|
|
|
load_gitmodules_file(index, &state);
|
2017-03-15 00:46:39 +03:00
|
|
|
|
unpack-trees: exit check_updates() early if updates are not wanted
check_updates() has a lot of code that repeatedly checks whether
o->update or o->dry_run are set. (Note that o->dry_run is a
near-synonym for !o->update, but not quite as per commit 2c9078d05bf2
("unpack-trees: add the dry_run flag to unpack_trees_options",
2011-05-25).) In fact, this function almost turns into a no-op whenever
the condition
!o->update || o->dry_run
is met. Simplify the code by checking this condition at the beginning
of the function, and when it is true, do the few things that are
relevant and return early.
There are a few things that make the conversion not quite obvious:
* The fact that check_updates() does not actually turn into a no-op
when updates are not wanted may be slightly surprising. However,
commit 33ecf7eb61 (Discard "deleted" cache entries after using them
to update the working tree, 2008-02-07) put the discarding of
unused cache entries in check_updates() so we still need to keep
the call to remove_marked_cache_entries(). It's possible this
call belongs in another function, but it is certainly needed as
tests will fail if it is removed.
* The original called remove_scheduled_dirs() unconditionally.
Technically, commit 7847892716 (unlink_entry(): introduce
schedule_dir_for_removal(), 2009-02-09) should have made that call
conditional, but it didn't matter in practice because
remove_scheduled_dirs() becomes a no-op when all the calls to
unlink_entry() are skipped. As such, we do not need to call it.
* When (o->dry_run && o->update), the original would have two calls
to git_attr_set_direction() surrounding a bunch of skipped updates.
These two calls to git_attr_set_direction() cancel each other out
and thus can be omitted when o->dry_run is true just as they
already are when !o->update.
* The code would previously call setup_collided_checkout_detection()
and report_collided_checkout() even when o->dry_run. However, this
was just an expensive no-op because
setup_collided_checkout_detection() merely cleared the CE_MATCHED
flag for each cache entry, and report_collided_checkout() reported
which ones had it set. Since a dry-run would skip all the
checkout_entry() calls, CE_MATCHED would never get set and thus
no collisions would be reported. Since we can't detect the
collisions anyway without doing updates, skipping the collisions
detection setup and reporting is an optimization.
* The code previously would call get_progress() and
display_progress() even when (!o->update || o->dry_run). This
served to show how long it took to skip all the updates, which is
somewhat useless. Since we are skipping the updates, we can skip
showing how long it takes to skip them.
Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-01-07 09:57:57 +03:00
|
|
|
if (has_promisor_remote()) {
|
2017-12-08 18:58:47 +03:00
|
|
|
/*
|
|
|
|
* Prefetch the objects that are to be checked out in the loop
|
|
|
|
* below.
|
|
|
|
*/
|
|
|
|
struct oid_array to_fetch = OID_ARRAY_INIT;
|
|
|
|
for (i = 0; i < index->cache_nr; i++) {
|
|
|
|
struct cache_entry *ce = index->cache[i];
|
2019-03-30 00:39:27 +03:00
|
|
|
|
|
|
|
if (!(ce->ce_flags & CE_UPDATE) ||
|
|
|
|
S_ISGITLINK(ce->ce_mode))
|
|
|
|
continue;
|
|
|
|
if (!oid_object_info_extended(the_repository, &ce->oid,
|
|
|
|
NULL,
|
|
|
|
OBJECT_INFO_FOR_PREFETCH))
|
|
|
|
continue;
|
|
|
|
oid_array_append(&to_fetch, &ce->oid);
|
2017-12-08 18:58:47 +03:00
|
|
|
}
|
2020-04-02 22:19:16 +03:00
|
|
|
promisor_remote_get_direct(the_repository,
|
|
|
|
to_fetch.oid, to_fetch.nr);
|
2018-03-25 19:31:48 +03:00
|
|
|
oid_array_clear(&to_fetch);
|
2017-12-08 18:58:47 +03:00
|
|
|
}
|
unpack-trees: add basic support for parallel checkout
This new interface allows us to enqueue some of the entries being
checked out to later uncompress them, apply in-process filters, and
write out the files in parallel. For now, the parallel checkout
machinery is enabled by default and there is no user configuration, but
run_parallel_checkout() just writes the queued entries in sequence
(without spawning additional workers). The next patch will actually
implement the parallelism and, later, we will make it configurable.
Note that, to avoid potential data races, not all entries are eligible
for parallel checkout. Also, paths that collide on disk (e.g.
case-sensitive paths in case-insensitive file systems), are detected by
the parallel checkout code and skipped, so that they can be safely
sequentially handled later. The collision detection works like the
following:
- If the collision was at basename (e.g. 'a/b' and 'a/B'), the framework
detects it by looking for EEXIST and EISDIR errors after an
open(O_CREAT | O_EXCL) failure.
- If the collision was at dirname (e.g. 'a/b' and 'A'), it is detected
at the has_dirs_only_path() check, which is done for the leading path
of each item in the parallel checkout queue.
Both verifications rely on the fact that, before enqueueing an entry for
parallel checkout, checkout_entry() makes sure that there is no file at
the entry's path and that its leading components are all real
directories. So, any later change in these conditions indicates that
there was a collision (either between two parallel-eligible entries or
between an eligible and an ineligible one).
After all parallel-eligible entries have been processed, the collided
(and thus, skipped) entries are sequentially fed to checkout_entry()
again. This is similar to the way the current code deals with
collisions, overwriting the previously checked out entries with the
subsequent ones. The only difference is that, since we no longer create
the files in the same order that they appear on index, we are not able
to determine which of the colliding entries will survive on disk (for
the classic code, it is always the last entry).
Co-authored-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Co-authored-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-04-19 03:14:53 +03:00
|
|
|
|
parallel-checkout: add configuration options
Make parallel checkout configurable by introducing two new settings:
checkout.workers and checkout.thresholdForParallelism. The first defines
the number of workers (where one means sequential checkout), and the
second defines the minimum number of entries to attempt parallel
checkout.
To decide the default value for checkout.workers, the parallel version
was benchmarked during three operations in the linux repo, with cold
cache: cloning v5.8, checking out v5.8 from v2.6.15 (checkout I) and
checking out v5.8 from v5.7 (checkout II). The four tables below show
the mean run times and standard deviations for 5 runs in: a local file
system on SSD, a local file system on HDD, a Linux NFS server, and
Amazon EFS (all on Linux). Each parallel checkout test was executed with
the number of workers that brings the best overall results in that
environment.
Local SSD:
Sequential 10 workers Speedup
Clone 8.805 s ± 0.043 s 3.564 s ± 0.041 s 2.47 ± 0.03
Checkout I 9.678 s ± 0.057 s 4.486 s ± 0.050 s 2.16 ± 0.03
Checkout II 5.034 s ± 0.072 s 3.021 s ± 0.038 s 1.67 ± 0.03
Local HDD:
Sequential 10 workers Speedup
Clone 32.288 s ± 0.580 s 30.724 s ± 0.522 s 1.05 ± 0.03
Checkout I 54.172 s ± 7.119 s 54.429 s ± 6.738 s 1.00 ± 0.18
Checkout II 40.465 s ± 2.402 s 38.682 s ± 1.365 s 1.05 ± 0.07
Linux NFS server (v4.1, on EBS, single availability zone):
Sequential 32 workers Speedup
Clone 240.368 s ± 6.347 s 57.349 s ± 0.870 s 4.19 ± 0.13
Checkout I 242.862 s ± 2.215 s 58.700 s ± 0.904 s 4.14 ± 0.07
Checkout II 65.751 s ± 1.577 s 23.820 s ± 0.407 s 2.76 ± 0.08
EFS (v4.1, replicated over multiple availability zones):
Sequential 32 workers Speedup
Clone 922.321 s ± 2.274 s 210.453 s ± 3.412 s 4.38 ± 0.07
Checkout I 1011.300 s ± 7.346 s 297.828 s ± 0.964 s 3.40 ± 0.03
Checkout II 294.104 s ± 1.836 s 126.017 s ± 1.190 s 2.33 ± 0.03
The above benchmarks show that parallel checkout is most effective on
repositories located on an SSD or over a distributed file system. For
local file systems on spinning disks, and/or older machines, the
parallelism does not always bring a good performance. For this reason,
the default value for checkout.workers is one, a.k.a. sequential
checkout.
To decide the default value for checkout.thresholdForParallelism,
another benchmark was executed in the "Local SSD" setup, where parallel
checkout showed to be beneficial. This time, we compared the runtime of
a `git checkout -f`, with and without parallelism, after randomly
removing an increasing number of files from the Linux working tree. The
"sequential fallback" column below corresponds to the executions where
checkout.workers was 10 but checkout.thresholdForParallelism was equal
to the number of to-be-updated files plus one (so that we end up writing
sequentially). Each test case was sampled 15 times, and each sample had
a randomly different set of files removed. Here are the results:
sequential fallback 10 workers speedup
10 files 772.3 ms ± 12.6 ms 769.0 ms ± 13.6 ms 1.00 ± 0.02
20 files 780.5 ms ± 15.8 ms 775.2 ms ± 9.2 ms 1.01 ± 0.02
50 files 806.2 ms ± 13.8 ms 767.4 ms ± 8.5 ms 1.05 ± 0.02
100 files 833.7 ms ± 21.4 ms 750.5 ms ± 16.8 ms 1.11 ± 0.04
200 files 897.6 ms ± 30.9 ms 730.5 ms ± 14.7 ms 1.23 ± 0.05
500 files 1035.4 ms ± 48.0 ms 677.1 ms ± 22.3 ms 1.53 ± 0.09
1000 files 1244.6 ms ± 35.6 ms 654.0 ms ± 38.3 ms 1.90 ± 0.12
2000 files 1488.8 ms ± 53.4 ms 658.8 ms ± 23.8 ms 2.26 ± 0.12
From the above numbers, 100 files seems to be a reasonable default value
for the threshold setting.
Note: Up to 1000 files, we observe a drop in the execution time of the
parallel code with an increase in the number of files. This is a rather
odd behavior, but it was observed in multiple repetitions. Above 1000
files, the execution time increases according to the number of files, as
one would expect.
About the test environments: Local SSD tests were executed on an
i7-7700HQ (4 cores with hyper-threading) running Manjaro Linux. Local
HDD tests were executed on an Intel(R) Xeon(R) E3-1230 (also 4 cores
with hyper-threading), HDD Seagate Barracuda 7200.14 SATA 3.1, running
Debian. NFS and EFS tests were executed on an Amazon EC2 c5n.xlarge
instance, with 4 vCPUs. The Linux NFS server was running on a m6g.large
instance with 2 vCPUSs and a 1 TB EBS GP2 volume. Before each timing,
the linux repository was removed (or checked out back to its previous
state), and `sync && sysctl vm.drop_caches=3` was executed.
Co-authored-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-04-19 03:14:55 +03:00
|
|
|
get_parallel_checkout_configs(&pc_workers, &pc_threshold);
|
|
|
|
|
unpack-trees: add basic support for parallel checkout
This new interface allows us to enqueue some of the entries being
checked out to later uncompress them, apply in-process filters, and
write out the files in parallel. For now, the parallel checkout
machinery is enabled by default and there is no user configuration, but
run_parallel_checkout() just writes the queued entries in sequence
(without spawning additional workers). The next patch will actually
implement the parallelism and, later, we will make it configurable.
Note that, to avoid potential data races, not all entries are eligible
for parallel checkout. Also, paths that collide on disk (e.g.
case-sensitive paths in case-insensitive file systems), are detected by
the parallel checkout code and skipped, so that they can be safely
sequentially handled later. The collision detection works like the
following:
- If the collision was at basename (e.g. 'a/b' and 'a/B'), the framework
detects it by looking for EEXIST and EISDIR errors after an
open(O_CREAT | O_EXCL) failure.
- If the collision was at dirname (e.g. 'a/b' and 'A'), it is detected
at the has_dirs_only_path() check, which is done for the leading path
of each item in the parallel checkout queue.
Both verifications rely on the fact that, before enqueueing an entry for
parallel checkout, checkout_entry() makes sure that there is no file at
the entry's path and that its leading components are all real
directories. So, any later change in these conditions indicates that
there was a collision (either between two parallel-eligible entries or
between an eligible and an ineligible one).
After all parallel-eligible entries have been processed, the collided
(and thus, skipped) entries are sequentially fed to checkout_entry()
again. This is similar to the way the current code deals with
collisions, overwriting the previously checked out entries with the
subsequent ones. The only difference is that, since we no longer create
the files in the same order that they appear on index, we are not able
to determine which of the colliding entries will survive on disk (for
the classic code, it is always the last entry).
Co-authored-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Co-authored-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-04-19 03:14:53 +03:00
|
|
|
enable_delayed_checkout(&state);
|
parallel-checkout: add configuration options
Make parallel checkout configurable by introducing two new settings:
checkout.workers and checkout.thresholdForParallelism. The first defines
the number of workers (where one means sequential checkout), and the
second defines the minimum number of entries to attempt parallel
checkout.
To decide the default value for checkout.workers, the parallel version
was benchmarked during three operations in the linux repo, with cold
cache: cloning v5.8, checking out v5.8 from v2.6.15 (checkout I) and
checking out v5.8 from v5.7 (checkout II). The four tables below show
the mean run times and standard deviations for 5 runs in: a local file
system on SSD, a local file system on HDD, a Linux NFS server, and
Amazon EFS (all on Linux). Each parallel checkout test was executed with
the number of workers that brings the best overall results in that
environment.
Local SSD:
Sequential 10 workers Speedup
Clone 8.805 s ± 0.043 s 3.564 s ± 0.041 s 2.47 ± 0.03
Checkout I 9.678 s ± 0.057 s 4.486 s ± 0.050 s 2.16 ± 0.03
Checkout II 5.034 s ± 0.072 s 3.021 s ± 0.038 s 1.67 ± 0.03
Local HDD:
Sequential 10 workers Speedup
Clone 32.288 s ± 0.580 s 30.724 s ± 0.522 s 1.05 ± 0.03
Checkout I 54.172 s ± 7.119 s 54.429 s ± 6.738 s 1.00 ± 0.18
Checkout II 40.465 s ± 2.402 s 38.682 s ± 1.365 s 1.05 ± 0.07
Linux NFS server (v4.1, on EBS, single availability zone):
Sequential 32 workers Speedup
Clone 240.368 s ± 6.347 s 57.349 s ± 0.870 s 4.19 ± 0.13
Checkout I 242.862 s ± 2.215 s 58.700 s ± 0.904 s 4.14 ± 0.07
Checkout II 65.751 s ± 1.577 s 23.820 s ± 0.407 s 2.76 ± 0.08
EFS (v4.1, replicated over multiple availability zones):
Sequential 32 workers Speedup
Clone 922.321 s ± 2.274 s 210.453 s ± 3.412 s 4.38 ± 0.07
Checkout I 1011.300 s ± 7.346 s 297.828 s ± 0.964 s 3.40 ± 0.03
Checkout II 294.104 s ± 1.836 s 126.017 s ± 1.190 s 2.33 ± 0.03
The above benchmarks show that parallel checkout is most effective on
repositories located on an SSD or over a distributed file system. For
local file systems on spinning disks, and/or older machines, the
parallelism does not always bring a good performance. For this reason,
the default value for checkout.workers is one, a.k.a. sequential
checkout.
To decide the default value for checkout.thresholdForParallelism,
another benchmark was executed in the "Local SSD" setup, where parallel
checkout showed to be beneficial. This time, we compared the runtime of
a `git checkout -f`, with and without parallelism, after randomly
removing an increasing number of files from the Linux working tree. The
"sequential fallback" column below corresponds to the executions where
checkout.workers was 10 but checkout.thresholdForParallelism was equal
to the number of to-be-updated files plus one (so that we end up writing
sequentially). Each test case was sampled 15 times, and each sample had
a randomly different set of files removed. Here are the results:
sequential fallback 10 workers speedup
10 files 772.3 ms ± 12.6 ms 769.0 ms ± 13.6 ms 1.00 ± 0.02
20 files 780.5 ms ± 15.8 ms 775.2 ms ± 9.2 ms 1.01 ± 0.02
50 files 806.2 ms ± 13.8 ms 767.4 ms ± 8.5 ms 1.05 ± 0.02
100 files 833.7 ms ± 21.4 ms 750.5 ms ± 16.8 ms 1.11 ± 0.04
200 files 897.6 ms ± 30.9 ms 730.5 ms ± 14.7 ms 1.23 ± 0.05
500 files 1035.4 ms ± 48.0 ms 677.1 ms ± 22.3 ms 1.53 ± 0.09
1000 files 1244.6 ms ± 35.6 ms 654.0 ms ± 38.3 ms 1.90 ± 0.12
2000 files 1488.8 ms ± 53.4 ms 658.8 ms ± 23.8 ms 2.26 ± 0.12
From the above numbers, 100 files seems to be a reasonable default value
for the threshold setting.
Note: Up to 1000 files, we observe a drop in the execution time of the
parallel code with an increase in the number of files. This is a rather
odd behavior, but it was observed in multiple repetitions. Above 1000
files, the execution time increases according to the number of files, as
one would expect.
About the test environments: Local SSD tests were executed on an
i7-7700HQ (4 cores with hyper-threading) running Manjaro Linux. Local
HDD tests were executed on an Intel(R) Xeon(R) E3-1230 (also 4 cores
with hyper-threading), HDD Seagate Barracuda 7200.14 SATA 3.1, running
Debian. NFS and EFS tests were executed on an Amazon EC2 c5n.xlarge
instance, with 4 vCPUs. The Linux NFS server was running on a m6g.large
instance with 2 vCPUSs and a 1 TB EBS GP2 volume. Before each timing,
the linux repository was removed (or checked out back to its previous
state), and `sync && sysctl vm.drop_caches=3` was executed.
Co-authored-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-04-19 03:14:55 +03:00
|
|
|
if (pc_workers > 1)
|
|
|
|
init_parallel_checkout();
|
2008-03-22 19:48:41 +03:00
|
|
|
for (i = 0; i < index->cache_nr; i++) {
|
|
|
|
struct cache_entry *ce = index->cache[i];
|
|
|
|
|
2008-01-15 03:03:17 +03:00
|
|
|
if (ce->ce_flags & CE_UPDATE) {
|
2021-04-19 03:14:56 +03:00
|
|
|
size_t last_pc_queue_size = pc_queue_size();
|
|
|
|
|
2015-07-18 00:19:27 +03:00
|
|
|
if (ce->ce_flags & CE_WT_REMOVE)
|
2018-05-02 12:38:39 +03:00
|
|
|
BUG("both update and delete flags are set on %s",
|
2015-07-18 00:19:27 +03:00
|
|
|
ce->name);
|
2008-01-15 03:03:17 +03:00
|
|
|
ce->ce_flags &= ~CE_UPDATE;
|
unpack-trees: exit check_updates() early if updates are not wanted
check_updates() has a lot of code that repeatedly checks whether
o->update or o->dry_run are set. (Note that o->dry_run is a
near-synonym for !o->update, but not quite as per commit 2c9078d05bf2
("unpack-trees: add the dry_run flag to unpack_trees_options",
2011-05-25).) In fact, this function almost turns into a no-op whenever
the condition
!o->update || o->dry_run
is met. Simplify the code by checking this condition at the beginning
of the function, and when it is true, do the few things that are
relevant and return early.
There are a few things that make the conversion not quite obvious:
* The fact that check_updates() does not actually turn into a no-op
when updates are not wanted may be slightly surprising. However,
commit 33ecf7eb61 (Discard "deleted" cache entries after using them
to update the working tree, 2008-02-07) put the discarding of
unused cache entries in check_updates() so we still need to keep
the call to remove_marked_cache_entries(). It's possible this
call belongs in another function, but it is certainly needed as
tests will fail if it is removed.
* The original called remove_scheduled_dirs() unconditionally.
Technically, commit 7847892716 (unlink_entry(): introduce
schedule_dir_for_removal(), 2009-02-09) should have made that call
conditional, but it didn't matter in practice because
remove_scheduled_dirs() becomes a no-op when all the calls to
unlink_entry() are skipped. As such, we do not need to call it.
* When (o->dry_run && o->update), the original would have two calls
to git_attr_set_direction() surrounding a bunch of skipped updates.
These two calls to git_attr_set_direction() cancel each other out
and thus can be omitted when o->dry_run is true just as they
already are when !o->update.
* The code would previously call setup_collided_checkout_detection()
and report_collided_checkout() even when o->dry_run. However, this
was just an expensive no-op because
setup_collided_checkout_detection() merely cleared the CE_MATCHED
flag for each cache entry, and report_collided_checkout() reported
which ones had it set. Since a dry-run would skip all the
checkout_entry() calls, CE_MATCHED would never get set and thus
no collisions would be reported. Since we can't detect the
collisions anyway without doing updates, skipping the collisions
detection setup and reporting is an optimization.
* The code previously would call get_progress() and
display_progress() even when (!o->update || o->dry_run). This
served to show how long it took to skip all the updates, which is
somewhat useless. Since we are skipping the updates, we can skip
showing how long it takes to skip them.
Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-01-07 09:57:57 +03:00
|
|
|
errs |= checkout_entry(ce, &state, NULL, NULL);
|
2021-04-19 03:14:56 +03:00
|
|
|
|
|
|
|
if (last_pc_queue_size == pc_queue_size())
|
|
|
|
display_progress(progress, ++cnt);
|
2006-07-30 22:25:18 +04:00
|
|
|
}
|
|
|
|
}
|
parallel-checkout: add configuration options
Make parallel checkout configurable by introducing two new settings:
checkout.workers and checkout.thresholdForParallelism. The first defines
the number of workers (where one means sequential checkout), and the
second defines the minimum number of entries to attempt parallel
checkout.
To decide the default value for checkout.workers, the parallel version
was benchmarked during three operations in the linux repo, with cold
cache: cloning v5.8, checking out v5.8 from v2.6.15 (checkout I) and
checking out v5.8 from v5.7 (checkout II). The four tables below show
the mean run times and standard deviations for 5 runs in: a local file
system on SSD, a local file system on HDD, a Linux NFS server, and
Amazon EFS (all on Linux). Each parallel checkout test was executed with
the number of workers that brings the best overall results in that
environment.
Local SSD:
Sequential 10 workers Speedup
Clone 8.805 s ± 0.043 s 3.564 s ± 0.041 s 2.47 ± 0.03
Checkout I 9.678 s ± 0.057 s 4.486 s ± 0.050 s 2.16 ± 0.03
Checkout II 5.034 s ± 0.072 s 3.021 s ± 0.038 s 1.67 ± 0.03
Local HDD:
Sequential 10 workers Speedup
Clone 32.288 s ± 0.580 s 30.724 s ± 0.522 s 1.05 ± 0.03
Checkout I 54.172 s ± 7.119 s 54.429 s ± 6.738 s 1.00 ± 0.18
Checkout II 40.465 s ± 2.402 s 38.682 s ± 1.365 s 1.05 ± 0.07
Linux NFS server (v4.1, on EBS, single availability zone):
Sequential 32 workers Speedup
Clone 240.368 s ± 6.347 s 57.349 s ± 0.870 s 4.19 ± 0.13
Checkout I 242.862 s ± 2.215 s 58.700 s ± 0.904 s 4.14 ± 0.07
Checkout II 65.751 s ± 1.577 s 23.820 s ± 0.407 s 2.76 ± 0.08
EFS (v4.1, replicated over multiple availability zones):
Sequential 32 workers Speedup
Clone 922.321 s ± 2.274 s 210.453 s ± 3.412 s 4.38 ± 0.07
Checkout I 1011.300 s ± 7.346 s 297.828 s ± 0.964 s 3.40 ± 0.03
Checkout II 294.104 s ± 1.836 s 126.017 s ± 1.190 s 2.33 ± 0.03
The above benchmarks show that parallel checkout is most effective on
repositories located on an SSD or over a distributed file system. For
local file systems on spinning disks, and/or older machines, the
parallelism does not always bring a good performance. For this reason,
the default value for checkout.workers is one, a.k.a. sequential
checkout.
To decide the default value for checkout.thresholdForParallelism,
another benchmark was executed in the "Local SSD" setup, where parallel
checkout showed to be beneficial. This time, we compared the runtime of
a `git checkout -f`, with and without parallelism, after randomly
removing an increasing number of files from the Linux working tree. The
"sequential fallback" column below corresponds to the executions where
checkout.workers was 10 but checkout.thresholdForParallelism was equal
to the number of to-be-updated files plus one (so that we end up writing
sequentially). Each test case was sampled 15 times, and each sample had
a randomly different set of files removed. Here are the results:
sequential fallback 10 workers speedup
10 files 772.3 ms ± 12.6 ms 769.0 ms ± 13.6 ms 1.00 ± 0.02
20 files 780.5 ms ± 15.8 ms 775.2 ms ± 9.2 ms 1.01 ± 0.02
50 files 806.2 ms ± 13.8 ms 767.4 ms ± 8.5 ms 1.05 ± 0.02
100 files 833.7 ms ± 21.4 ms 750.5 ms ± 16.8 ms 1.11 ± 0.04
200 files 897.6 ms ± 30.9 ms 730.5 ms ± 14.7 ms 1.23 ± 0.05
500 files 1035.4 ms ± 48.0 ms 677.1 ms ± 22.3 ms 1.53 ± 0.09
1000 files 1244.6 ms ± 35.6 ms 654.0 ms ± 38.3 ms 1.90 ± 0.12
2000 files 1488.8 ms ± 53.4 ms 658.8 ms ± 23.8 ms 2.26 ± 0.12
From the above numbers, 100 files seems to be a reasonable default value
for the threshold setting.
Note: Up to 1000 files, we observe a drop in the execution time of the
parallel code with an increase in the number of files. This is a rather
odd behavior, but it was observed in multiple repetitions. Above 1000
files, the execution time increases according to the number of files, as
one would expect.
About the test environments: Local SSD tests were executed on an
i7-7700HQ (4 cores with hyper-threading) running Manjaro Linux. Local
HDD tests were executed on an Intel(R) Xeon(R) E3-1230 (also 4 cores
with hyper-threading), HDD Seagate Barracuda 7200.14 SATA 3.1, running
Debian. NFS and EFS tests were executed on an Amazon EC2 c5n.xlarge
instance, with 4 vCPUs. The Linux NFS server was running on a m6g.large
instance with 2 vCPUSs and a 1 TB EBS GP2 volume. Before each timing,
the linux repository was removed (or checked out back to its previous
state), and `sync && sysctl vm.drop_caches=3` was executed.
Co-authored-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-04-19 03:14:55 +03:00
|
|
|
if (pc_workers > 1)
|
2021-04-19 03:14:56 +03:00
|
|
|
errs |= run_parallel_checkout(&state, pc_workers, pc_threshold,
|
|
|
|
progress, &cnt);
|
2007-10-30 21:57:33 +03:00
|
|
|
stop_progress(&progress);
|
2018-11-13 21:28:00 +03:00
|
|
|
errs |= finish_delayed_checkout(&state, NULL);
|
unpack-trees: exit check_updates() early if updates are not wanted
check_updates() has a lot of code that repeatedly checks whether
o->update or o->dry_run are set. (Note that o->dry_run is a
near-synonym for !o->update, but not quite as per commit 2c9078d05bf2
("unpack-trees: add the dry_run flag to unpack_trees_options",
2011-05-25).) In fact, this function almost turns into a no-op whenever
the condition
!o->update || o->dry_run
is met. Simplify the code by checking this condition at the beginning
of the function, and when it is true, do the few things that are
relevant and return early.
There are a few things that make the conversion not quite obvious:
* The fact that check_updates() does not actually turn into a no-op
when updates are not wanted may be slightly surprising. However,
commit 33ecf7eb61 (Discard "deleted" cache entries after using them
to update the working tree, 2008-02-07) put the discarding of
unused cache entries in check_updates() so we still need to keep
the call to remove_marked_cache_entries(). It's possible this
call belongs in another function, but it is certainly needed as
tests will fail if it is removed.
* The original called remove_scheduled_dirs() unconditionally.
Technically, commit 7847892716 (unlink_entry(): introduce
schedule_dir_for_removal(), 2009-02-09) should have made that call
conditional, but it didn't matter in practice because
remove_scheduled_dirs() becomes a no-op when all the calls to
unlink_entry() are skipped. As such, we do not need to call it.
* When (o->dry_run && o->update), the original would have two calls
to git_attr_set_direction() surrounding a bunch of skipped updates.
These two calls to git_attr_set_direction() cancel each other out
and thus can be omitted when o->dry_run is true just as they
already are when !o->update.
* The code would previously call setup_collided_checkout_detection()
and report_collided_checkout() even when o->dry_run. However, this
was just an expensive no-op because
setup_collided_checkout_detection() merely cleared the CE_MATCHED
flag for each cache entry, and report_collided_checkout() reported
which ones had it set. Since a dry-run would skip all the
checkout_entry() calls, CE_MATCHED would never get set and thus
no collisions would be reported. Since we can't detect the
collisions anyway without doing updates, skipping the collisions
detection setup and reporting is an optimization.
* The code previously would call get_progress() and
display_progress() even when (!o->update || o->dry_run). This
served to show how long it took to skip all the updates, which is
somewhat useless. Since we are skipping the updates, we can skip
showing how long it takes to skip them.
Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-01-07 09:57:57 +03:00
|
|
|
git_attr_set_direction(GIT_ATTR_CHECKIN);
|
2018-08-17 21:00:39 +03:00
|
|
|
|
|
|
|
if (o->clone)
|
|
|
|
report_collided_checkout(index);
|
|
|
|
|
2018-08-18 17:41:23 +03:00
|
|
|
trace_performance_leave("check_updates");
|
2008-03-19 08:01:28 +03:00
|
|
|
return errs != 0;
|
2006-07-30 22:25:18 +04:00
|
|
|
}
|
|
|
|
|
2013-06-02 19:46:55 +04:00
|
|
|
static int verify_uptodate_sparse(const struct cache_entry *ce,
|
|
|
|
struct unpack_trees_options *o);
|
|
|
|
static int verify_absent_sparse(const struct cache_entry *ce,
|
|
|
|
enum unpack_trees_error_types,
|
|
|
|
struct unpack_trees_options *o);
|
2009-08-20 17:47:09 +04:00
|
|
|
|
2014-06-13 16:19:30 +04:00
|
|
|
static int apply_sparse_checkout(struct index_state *istate,
|
|
|
|
struct cache_entry *ce,
|
|
|
|
struct unpack_trees_options *o)
|
2009-08-20 17:47:09 +04:00
|
|
|
{
|
|
|
|
int was_skip_worktree = ce_skip_worktree(ce);
|
|
|
|
|
unpack-trees: move all skip-worktree checks back to unpack_trees()
Earlier, the will_have_skip_worktree() checks are done in various
places, which makes it hard to traverse the index tree-alike, required
by excluded_from_list(). This patch moves all the checks into two
loops in unpack_trees().
Entries in index in this operation can be classified into two
groups: ones already in index before unpack_trees() is called and ones
added to index after traverse_trees() is called.
In both groups, before checking file status on worktree, the future
skip-worktree bit must be checked, so that if an entry will be outside
worktree, worktree should not be checked.
For the first group, the future skip-worktree bit is precomputed and
stored as CE_NEW_SKIP_WORKTREE in the first loop before
traverse_trees() is called so that *way_merge() function does not need
to compute it again.
For the second group, because we don't know what entries will be in
this group until traverse_trees() finishes, operations that need
future skip-worktree check is delayed until CE_NEW_SKIP_WORKTREE is
computed in the second loop. CE_ADDED is used to mark entries in the
second group.
CE_ADDED and CE_NEW_SKIP_WORKTREE are temporary flags used in
unpack_trees(). CE_ADDED is only used by add_to_index(), which should
not be called while unpack_trees() is running.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-11-27 09:24:04 +03:00
|
|
|
if (ce->ce_flags & CE_NEW_SKIP_WORKTREE)
|
2009-08-20 17:47:09 +04:00
|
|
|
ce->ce_flags |= CE_SKIP_WORKTREE;
|
|
|
|
else
|
|
|
|
ce->ce_flags &= ~CE_SKIP_WORKTREE;
|
2014-06-13 16:19:39 +04:00
|
|
|
if (was_skip_worktree != ce_skip_worktree(ce)) {
|
|
|
|
ce->ce_flags |= CE_UPDATE_IN_BASE;
|
2017-09-22 19:35:40 +03:00
|
|
|
mark_fsmonitor_invalid(istate, ce);
|
2014-06-13 16:19:30 +04:00
|
|
|
istate->cache_changed |= CE_ENTRY_CHANGED;
|
2014-06-13 16:19:39 +04:00
|
|
|
}
|
2009-08-20 17:47:09 +04:00
|
|
|
|
|
|
|
/*
|
2010-07-31 10:14:26 +04:00
|
|
|
* if (!was_skip_worktree && !ce_skip_worktree()) {
|
|
|
|
* This is perfectly normal. Move on;
|
|
|
|
* }
|
2009-08-20 17:47:09 +04:00
|
|
|
*/
|
2010-07-31 10:14:26 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Merge strategies may set CE_UPDATE|CE_REMOVE outside checkout
|
|
|
|
* area as a result of ce_skip_worktree() shortcuts in
|
2010-07-31 10:14:27 +04:00
|
|
|
* verify_absent() and verify_uptodate().
|
|
|
|
* Make sure they don't modify worktree if they are already
|
|
|
|
* outside checkout area
|
2010-07-31 10:14:26 +04:00
|
|
|
*/
|
2010-07-31 10:14:27 +04:00
|
|
|
if (was_skip_worktree && ce_skip_worktree(ce)) {
|
|
|
|
ce->ce_flags &= ~CE_UPDATE;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* By default, when CE_REMOVE is on, CE_WT_REMOVE is also
|
|
|
|
* on to get that file removed from both index and worktree.
|
|
|
|
* If that file is already outside worktree area, don't
|
|
|
|
* bother remove it.
|
|
|
|
*/
|
|
|
|
if (ce->ce_flags & CE_REMOVE)
|
|
|
|
ce->ce_flags &= ~CE_WT_REMOVE;
|
|
|
|
}
|
2009-08-20 17:47:09 +04:00
|
|
|
|
|
|
|
if (!was_skip_worktree && ce_skip_worktree(ce)) {
|
|
|
|
/*
|
|
|
|
* If CE_UPDATE is set, verify_uptodate() must be called already
|
|
|
|
* also stat info may have lost after merged_entry() so calling
|
|
|
|
* verify_uptodate() again may fail
|
|
|
|
*/
|
2020-03-27 03:48:50 +03:00
|
|
|
if (!(ce->ce_flags & CE_UPDATE) &&
|
|
|
|
verify_uptodate_sparse(ce, o)) {
|
|
|
|
ce->ce_flags &= ~CE_SKIP_WORKTREE;
|
2009-08-20 17:47:09 +04:00
|
|
|
return -1;
|
2020-03-27 03:48:50 +03:00
|
|
|
}
|
2009-08-20 17:47:09 +04:00
|
|
|
ce->ce_flags |= CE_WT_REMOVE;
|
2015-07-18 00:19:27 +03:00
|
|
|
ce->ce_flags &= ~CE_UPDATE;
|
2009-08-20 17:47:09 +04:00
|
|
|
}
|
|
|
|
if (was_skip_worktree && !ce_skip_worktree(ce)) {
|
2020-03-27 03:48:56 +03:00
|
|
|
if (verify_absent_sparse(ce, WARNING_SPARSE_ORPHANED_NOT_OVERWRITTEN, o))
|
2009-08-20 17:47:09 +04:00
|
|
|
return -1;
|
|
|
|
ce->ce_flags |= CE_UPDATE;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-03-27 03:48:59 +03:00
|
|
|
static int warn_conflicted_path(struct index_state *istate,
|
|
|
|
int i,
|
|
|
|
struct unpack_trees_options *o)
|
|
|
|
{
|
|
|
|
char *conflicting_path = istate->cache[i]->name;
|
|
|
|
int count = 0;
|
|
|
|
|
|
|
|
add_rejected_path(o, WARNING_SPARSE_UNMERGED_FILE, conflicting_path);
|
|
|
|
|
2020-05-07 16:17:33 +03:00
|
|
|
/* Find out how many higher stage entries are at same path */
|
|
|
|
while ((++count) + i < istate->cache_nr &&
|
|
|
|
!strcmp(conflicting_path, istate->cache[count + i]->name))
|
|
|
|
; /* do nothing */
|
|
|
|
|
2020-03-27 03:48:59 +03:00
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
2013-06-02 19:46:56 +04:00
|
|
|
static inline int call_unpack_fn(const struct cache_entry * const *src,
|
|
|
|
struct unpack_trees_options *o)
|
2008-03-06 07:15:44 +03:00
|
|
|
{
|
2008-03-07 05:12:28 +03:00
|
|
|
int ret = o->fn(src, o);
|
|
|
|
if (ret > 0)
|
2008-03-06 07:15:44 +03:00
|
|
|
ret = 0;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
static void mark_ce_used(struct cache_entry *ce, struct unpack_trees_options *o)
|
|
|
|
{
|
|
|
|
ce->ce_flags |= CE_UNPACKED;
|
|
|
|
|
|
|
|
if (o->cache_bottom < o->src_index->cache_nr &&
|
|
|
|
o->src_index->cache[o->cache_bottom] == ce) {
|
|
|
|
int bottom = o->cache_bottom;
|
|
|
|
while (bottom < o->src_index->cache_nr &&
|
|
|
|
o->src_index->cache[bottom]->ce_flags & CE_UNPACKED)
|
|
|
|
bottom++;
|
|
|
|
o->cache_bottom = bottom;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mark_all_ce_unused(struct index_state *index)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < index->cache_nr; i++)
|
unpack-trees: move all skip-worktree checks back to unpack_trees()
Earlier, the will_have_skip_worktree() checks are done in various
places, which makes it hard to traverse the index tree-alike, required
by excluded_from_list(). This patch moves all the checks into two
loops in unpack_trees().
Entries in index in this operation can be classified into two
groups: ones already in index before unpack_trees() is called and ones
added to index after traverse_trees() is called.
In both groups, before checking file status on worktree, the future
skip-worktree bit must be checked, so that if an entry will be outside
worktree, worktree should not be checked.
For the first group, the future skip-worktree bit is precomputed and
stored as CE_NEW_SKIP_WORKTREE in the first loop before
traverse_trees() is called so that *way_merge() function does not need
to compute it again.
For the second group, because we don't know what entries will be in
this group until traverse_trees() finishes, operations that need
future skip-worktree check is delayed until CE_NEW_SKIP_WORKTREE is
computed in the second loop. CE_ADDED is used to mark entries in the
second group.
CE_ADDED and CE_NEW_SKIP_WORKTREE are temporary flags used in
unpack_trees(). CE_ADDED is only used by add_to_index(), which should
not be called while unpack_trees() is running.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-11-27 09:24:04 +03:00
|
|
|
index->cache[i]->ce_flags &= ~(CE_UNPACKED | CE_ADDED | CE_NEW_SKIP_WORKTREE);
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
}
|
|
|
|
|
2013-06-02 19:46:55 +04:00
|
|
|
static int locate_in_src_index(const struct cache_entry *ce,
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
struct unpack_trees_options *o)
|
|
|
|
{
|
|
|
|
struct index_state *index = o->src_index;
|
|
|
|
int len = ce_namelen(ce);
|
|
|
|
int pos = index_name_pos(index, ce->name, len);
|
|
|
|
if (pos < 0)
|
|
|
|
pos = -1 - pos;
|
|
|
|
return pos;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We call unpack_index_entry() with an unmerged cache entry
|
|
|
|
* only in diff-index, and it wants a single callback. Skip
|
|
|
|
* the other unmerged entry with the same name.
|
|
|
|
*/
|
|
|
|
static void mark_ce_used_same_name(struct cache_entry *ce,
|
|
|
|
struct unpack_trees_options *o)
|
|
|
|
{
|
|
|
|
struct index_state *index = o->src_index;
|
|
|
|
int len = ce_namelen(ce);
|
|
|
|
int pos;
|
|
|
|
|
|
|
|
for (pos = locate_in_src_index(ce, o); pos < index->cache_nr; pos++) {
|
|
|
|
struct cache_entry *next = index->cache[pos];
|
|
|
|
if (len != ce_namelen(next) ||
|
|
|
|
memcmp(ce->name, next->name, len))
|
|
|
|
break;
|
|
|
|
mark_ce_used(next, o);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct cache_entry *next_cache_entry(struct unpack_trees_options *o)
|
|
|
|
{
|
|
|
|
const struct index_state *index = o->src_index;
|
|
|
|
int pos = o->cache_bottom;
|
|
|
|
|
|
|
|
while (pos < index->cache_nr) {
|
|
|
|
struct cache_entry *ce = index->cache[pos];
|
|
|
|
if (!(ce->ce_flags & CE_UNPACKED))
|
|
|
|
return ce;
|
|
|
|
pos++;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
Convert "struct cache_entry *" to "const ..." wherever possible
I attempted to make index_state->cache[] a "const struct cache_entry **"
to find out how existing entries in index are modified and where. The
question I have is what do we do if we really need to keep track of on-disk
changes in the index. The result is
- diff-lib.c: setting CE_UPTODATE
- name-hash.c: setting CE_HASHED
- preload-index.c, read-cache.c, unpack-trees.c and
builtin/update-index: obvious
- entry.c: write_entry() may refresh the checked out entry via
fill_stat_cache_info(). This causes "non-const struct cache_entry
*" in builtin/apply.c, builtin/checkout-index.c and
builtin/checkout.c
- builtin/ls-files.c: --with-tree changes stagemask and may set
CE_UPDATE
Of these, write_entry() and its call sites are probably most
interesting because it modifies on-disk info. But this is stat info
and can be retrieved via refresh, at least for porcelain
commands. Other just uses ce_flags for local purposes.
So, keeping track of "dirty" entries is just a matter of setting a
flag in index modification functions exposed by read-cache.c. Except
unpack-trees, the rest of the code base does not do anything funny
behind read-cache's back.
The actual patch is less valueable than the summary above. But if
anyone wants to re-identify the above sites. Applying this patch, then
this:
diff --git a/cache.h b/cache.h
index 430d021..1692891 100644
--- a/cache.h
+++ b/cache.h
@@ -267,7 +267,7 @@ static inline unsigned int canon_mode(unsigned int mode)
#define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1)
struct index_state {
- struct cache_entry **cache;
+ const struct cache_entry **cache;
unsigned int version;
unsigned int cache_nr, cache_alloc, cache_changed;
struct string_list *resolve_undo;
will help quickly identify them without bogus warnings.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-09 19:29:00 +04:00
|
|
|
static void add_same_unmerged(const struct cache_entry *ce,
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
struct unpack_trees_options *o)
|
|
|
|
{
|
|
|
|
struct index_state *index = o->src_index;
|
|
|
|
int len = ce_namelen(ce);
|
|
|
|
int pos = index_name_pos(index, ce->name, len);
|
|
|
|
|
|
|
|
if (0 <= pos)
|
|
|
|
die("programming error in a caller of mark_ce_used_same_name");
|
|
|
|
for (pos = -pos - 1; pos < index->cache_nr; pos++) {
|
|
|
|
struct cache_entry *next = index->cache[pos];
|
|
|
|
if (len != ce_namelen(next) ||
|
|
|
|
memcmp(ce->name, next->name, len))
|
|
|
|
break;
|
|
|
|
add_entry(o, next, 0, 0);
|
|
|
|
mark_ce_used(next, o);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int unpack_index_entry(struct cache_entry *ce,
|
|
|
|
struct unpack_trees_options *o)
|
2008-03-06 07:15:44 +03:00
|
|
|
{
|
2013-06-02 19:46:56 +04:00
|
|
|
const struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, };
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
int ret;
|
2008-03-07 05:12:28 +03:00
|
|
|
|
2010-05-14 13:31:33 +04:00
|
|
|
src[0] = ce;
|
|
|
|
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
mark_ce_used(ce, o);
|
2008-03-06 07:15:44 +03:00
|
|
|
if (ce_stage(ce)) {
|
|
|
|
if (o->skip_unmerged) {
|
2008-03-07 05:12:28 +03:00
|
|
|
add_entry(o, ce, 0, 0);
|
|
|
|
return 0;
|
2008-03-06 07:15:44 +03:00
|
|
|
}
|
|
|
|
}
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
ret = call_unpack_fn(src, o);
|
|
|
|
if (ce_stage(ce))
|
|
|
|
mark_ce_used_same_name(ce, o);
|
|
|
|
return ret;
|
2008-03-06 07:15:44 +03:00
|
|
|
}
|
|
|
|
|
2019-07-31 07:38:15 +03:00
|
|
|
static int find_cache_pos(struct traverse_info *, const char *p, size_t len);
|
unpack-trees.c: look ahead in the index
This makes the traversal of index be in sync with the tree traversal.
When unpack_callback() is fed a set of tree entries from trees, it
inspects the name of the entry and checks if the an index entry with
the same name could be hiding behind the current index entry, and
(1) if the name appears in the index as a leaf node, it is also
fed to the n_way_merge() callback function;
(2) if the name is a directory in the index, i.e. there are entries in
that are underneath it, then nothing is fed to the n_way_merge()
callback function;
(3) otherwise, if the name comes before the first eligible entry in the
index, the index entry is first unpacked alone.
When traverse_trees_recursive() descends into a subdirectory, the
cache_bottom pointer is moved to walk index entries within that directory.
All of these are omitted for diff-index, which does not even want to be
fed an index entry and a tree entry with D/F conflicts.
This fixes 3-way read-tree and exposes a bug in other parts of the system
in t6035, test #5. The test prepares these three trees:
O = HEAD^
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
A = HEAD
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob 587be6b4c3f93f93c489c0111bba5596147a26cb a/x
B = master
120000 blob a36b77384451ea1de7bd340ffca868249626bc52 a/b
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
With a clean index that matches HEAD, running
git read-tree -m -u --aggressive $O $A $B
now yields
120000 a36b77384451ea1de7bd340ffca868249626bc52 3 a/b
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 a/b-2/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 1 a/b/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 2 a/b/c/d
100644 587be6b4c3f93f93c489c0111bba5596147a26cb 0 a/x
which is correct. "master" created "a/b" symlink that did not exist,
and removed "a/b/c/d" while HEAD did not do touch either path.
Before this series, read-tree did not notice the situation and resolved
addition of "a/b" and removal of "a/b/c/d" independently. If A = HEAD had
another path "a/b/c/e" added, this merge should conflict but instead it
silently resolved "a/b" and then immediately overwrote it to add
"a/b/c/e", which was quite bogus.
Tests in t1012 start to work with this.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-20 11:03:39 +04:00
|
|
|
|
|
|
|
static void restore_cache_bottom(struct traverse_info *info, int bottom)
|
|
|
|
{
|
|
|
|
struct unpack_trees_options *o = info->data;
|
|
|
|
|
|
|
|
if (o->diff_index_cached)
|
|
|
|
return;
|
|
|
|
o->cache_bottom = bottom;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int switch_cache_bottom(struct traverse_info *info)
|
|
|
|
{
|
|
|
|
struct unpack_trees_options *o = info->data;
|
|
|
|
int ret, pos;
|
|
|
|
|
|
|
|
if (o->diff_index_cached)
|
|
|
|
return 0;
|
|
|
|
ret = o->cache_bottom;
|
2019-07-31 07:38:15 +03:00
|
|
|
pos = find_cache_pos(info->prev, info->name, info->namelen);
|
unpack-trees.c: look ahead in the index
This makes the traversal of index be in sync with the tree traversal.
When unpack_callback() is fed a set of tree entries from trees, it
inspects the name of the entry and checks if the an index entry with
the same name could be hiding behind the current index entry, and
(1) if the name appears in the index as a leaf node, it is also
fed to the n_way_merge() callback function;
(2) if the name is a directory in the index, i.e. there are entries in
that are underneath it, then nothing is fed to the n_way_merge()
callback function;
(3) otherwise, if the name comes before the first eligible entry in the
index, the index entry is first unpacked alone.
When traverse_trees_recursive() descends into a subdirectory, the
cache_bottom pointer is moved to walk index entries within that directory.
All of these are omitted for diff-index, which does not even want to be
fed an index entry and a tree entry with D/F conflicts.
This fixes 3-way read-tree and exposes a bug in other parts of the system
in t6035, test #5. The test prepares these three trees:
O = HEAD^
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
A = HEAD
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob 587be6b4c3f93f93c489c0111bba5596147a26cb a/x
B = master
120000 blob a36b77384451ea1de7bd340ffca868249626bc52 a/b
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
With a clean index that matches HEAD, running
git read-tree -m -u --aggressive $O $A $B
now yields
120000 a36b77384451ea1de7bd340ffca868249626bc52 3 a/b
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 a/b-2/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 1 a/b/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 2 a/b/c/d
100644 587be6b4c3f93f93c489c0111bba5596147a26cb 0 a/x
which is correct. "master" created "a/b" symlink that did not exist,
and removed "a/b/c/d" while HEAD did not do touch either path.
Before this series, read-tree did not notice the situation and resolved
addition of "a/b" and removal of "a/b/c/d" independently. If A = HEAD had
another path "a/b/c/e" added, this merge should conflict but instead it
silently resolved "a/b" and then immediately overwrote it to add
"a/b/c/e", which was quite bogus.
Tests in t1012 start to work with this.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-20 11:03:39 +04:00
|
|
|
|
|
|
|
if (pos < -1)
|
|
|
|
o->cache_bottom = -2 - pos;
|
|
|
|
else if (pos < 0)
|
|
|
|
o->cache_bottom = o->src_index->cache_nr;
|
|
|
|
return ret;
|
2008-03-06 07:15:44 +03:00
|
|
|
}
|
|
|
|
|
2017-04-14 22:25:54 +03:00
|
|
|
static inline int are_same_oid(struct name_entry *name_j, struct name_entry *name_k)
|
|
|
|
{
|
2019-01-15 03:39:44 +03:00
|
|
|
return !is_null_oid(&name_j->oid) && !is_null_oid(&name_k->oid) && oideq(&name_j->oid, &name_k->oid);
|
2017-04-14 22:25:54 +03:00
|
|
|
}
|
|
|
|
|
2018-08-18 17:41:24 +03:00
|
|
|
static int all_trees_same_as_cache_tree(int n, unsigned long dirmask,
|
|
|
|
struct name_entry *names,
|
|
|
|
struct traverse_info *info)
|
|
|
|
{
|
|
|
|
struct unpack_trees_options *o = info->data;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (!o->merge || dirmask != ((1 << n) - 1))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
for (i = 1; i < n; i++)
|
|
|
|
if (!are_same_oid(names, names + i))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return cache_tree_matches_traversal(o->src_index->cache_tree, names, info);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int index_pos_by_traverse_info(struct name_entry *names,
|
|
|
|
struct traverse_info *info)
|
|
|
|
{
|
|
|
|
struct unpack_trees_options *o = info->data;
|
2019-07-31 07:38:23 +03:00
|
|
|
struct strbuf name = STRBUF_INIT;
|
2018-08-18 17:41:24 +03:00
|
|
|
int pos;
|
|
|
|
|
2019-07-31 07:38:23 +03:00
|
|
|
strbuf_make_traverse_path(&name, info, names->path, names->pathlen);
|
|
|
|
strbuf_addch(&name, '/');
|
|
|
|
pos = index_name_pos(o->src_index, name.buf, name.len);
|
2021-03-30 16:10:57 +03:00
|
|
|
if (pos >= 0) {
|
|
|
|
if (!o->src_index->sparse_index ||
|
|
|
|
!(o->src_index->cache[pos]->ce_flags & CE_SKIP_WORKTREE))
|
|
|
|
BUG("This is a directory and should not exist in index");
|
|
|
|
} else {
|
|
|
|
pos = -pos - 1;
|
|
|
|
}
|
unpack-trees: watch for out-of-range index position
It's possible in a case where the index file contains a tree extension
but no blobs within that tree exist for index_pos_by_traverse_info() to
segfault. If the name_entry passed into index_pos_by_traverse_info() has
no blobs inside, AND is alphabetically later than all blobs currently in
the index file, index_pos_by_traverse_info() will segfault. For example,
an index file which looks something like this:
aaa#0
bbb/aaa#0
[Extensions]
TREE: zzz
In this example, 'index_name_pos(..., "zzz/", ...)' will return '-4',
indicating that "zzz/" could be inserted at position 3. However, when
the checks which ensure that the insertion position of "zzz/" look for a
blob at that position beginning with "zzz/", the index cache is accessed
out of range, causing a segfault.
This kind of index state is not typically generated during user
operations, and is in fact an edge case of the state being checked for
in the conditional where it was added. However, since the entry for the
BUG() line is ambiguous, tell some additional context to help Git
developers debug the failure later. When we know the name of the dir we
were trying to look up, it becomes possible to examine the index file
in a hex util to determine what went wrong; the position gives a hint
about where to start looking.
Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-01-08 05:31:27 +03:00
|
|
|
if (pos >= o->src_index->cache_nr ||
|
|
|
|
!starts_with(o->src_index->cache[pos]->name, name.buf) ||
|
2019-07-31 07:38:23 +03:00
|
|
|
(pos > 0 && starts_with(o->src_index->cache[pos-1]->name, name.buf)))
|
unpack-trees: watch for out-of-range index position
It's possible in a case where the index file contains a tree extension
but no blobs within that tree exist for index_pos_by_traverse_info() to
segfault. If the name_entry passed into index_pos_by_traverse_info() has
no blobs inside, AND is alphabetically later than all blobs currently in
the index file, index_pos_by_traverse_info() will segfault. For example,
an index file which looks something like this:
aaa#0
bbb/aaa#0
[Extensions]
TREE: zzz
In this example, 'index_name_pos(..., "zzz/", ...)' will return '-4',
indicating that "zzz/" could be inserted at position 3. However, when
the checks which ensure that the insertion position of "zzz/" look for a
blob at that position beginning with "zzz/", the index cache is accessed
out of range, causing a segfault.
This kind of index state is not typically generated during user
operations, and is in fact an edge case of the state being checked for
in the conditional where it was added. However, since the entry for the
BUG() line is ambiguous, tell some additional context to help Git
developers debug the failure later. When we know the name of the dir we
were trying to look up, it becomes possible to examine the index file
in a hex util to determine what went wrong; the position gives a hint
about where to start looking.
Signed-off-by: Emily Shaffer <emilyshaffer@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-01-08 05:31:27 +03:00
|
|
|
BUG("pos %d doesn't point to the first entry of %s in index",
|
|
|
|
pos, name.buf);
|
2019-07-31 07:38:23 +03:00
|
|
|
strbuf_release(&name);
|
2018-08-18 17:41:24 +03:00
|
|
|
return pos;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Fast path if we detect that all trees are the same as cache-tree at this
|
2018-08-25 16:02:09 +03:00
|
|
|
* path. We'll walk these trees in an iterative loop using cache-tree/index
|
|
|
|
* instead of ODB since we already know what these trees contain.
|
2018-08-18 17:41:24 +03:00
|
|
|
*/
|
|
|
|
static int traverse_by_cache_tree(int pos, int nr_entries, int nr_names,
|
|
|
|
struct traverse_info *info)
|
|
|
|
{
|
|
|
|
struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, };
|
|
|
|
struct unpack_trees_options *o = info->data;
|
2018-08-18 17:41:25 +03:00
|
|
|
struct cache_entry *tree_ce = NULL;
|
|
|
|
int ce_len = 0;
|
2018-08-18 17:41:24 +03:00
|
|
|
int i, d;
|
|
|
|
|
|
|
|
if (!o->merge)
|
|
|
|
BUG("We need cache-tree to do this optimization");
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Do what unpack_callback() and unpack_nondirectories() normally
|
|
|
|
* do. But we walk all paths in an iterative loop instead.
|
|
|
|
*
|
|
|
|
* D/F conflicts and higher stage entries are not a concern
|
|
|
|
* because cache-tree would be invalidated and we would never
|
|
|
|
* get here in the first place.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < nr_entries; i++) {
|
2018-08-18 17:41:25 +03:00
|
|
|
int new_ce_len, len, rc;
|
2018-08-18 17:41:24 +03:00
|
|
|
|
|
|
|
src[0] = o->src_index->cache[pos + i];
|
|
|
|
|
|
|
|
len = ce_namelen(src[0]);
|
2018-08-18 17:41:25 +03:00
|
|
|
new_ce_len = cache_entry_size(len);
|
|
|
|
|
|
|
|
if (new_ce_len > ce_len) {
|
|
|
|
new_ce_len <<= 1;
|
|
|
|
tree_ce = xrealloc(tree_ce, new_ce_len);
|
|
|
|
memset(tree_ce, 0, new_ce_len);
|
|
|
|
ce_len = new_ce_len;
|
|
|
|
|
|
|
|
tree_ce->ce_flags = create_ce_flags(0);
|
|
|
|
|
|
|
|
for (d = 1; d <= nr_names; d++)
|
|
|
|
src[d] = tree_ce;
|
|
|
|
}
|
2018-08-18 17:41:24 +03:00
|
|
|
|
|
|
|
tree_ce->ce_mode = src[0]->ce_mode;
|
|
|
|
tree_ce->ce_namelen = len;
|
|
|
|
oidcpy(&tree_ce->oid, &src[0]->oid);
|
|
|
|
memcpy(tree_ce->name, src[0]->name, len + 1);
|
|
|
|
|
|
|
|
rc = call_unpack_fn((const struct cache_entry * const *)src, o);
|
2018-08-18 17:41:25 +03:00
|
|
|
if (rc < 0) {
|
|
|
|
free(tree_ce);
|
2018-08-18 17:41:24 +03:00
|
|
|
return rc;
|
2018-08-18 17:41:25 +03:00
|
|
|
}
|
2018-08-18 17:41:24 +03:00
|
|
|
|
|
|
|
mark_ce_used(src[0], o);
|
|
|
|
}
|
2018-08-18 17:41:25 +03:00
|
|
|
free(tree_ce);
|
2018-08-18 17:41:24 +03:00
|
|
|
if (o->debug_unpack)
|
|
|
|
printf("Unpacked %d entries from %s to %s using cache-tree\n",
|
|
|
|
nr_entries,
|
|
|
|
o->src_index->cache[pos]->name,
|
|
|
|
o->src_index->cache[pos + nr_entries - 1]->name);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-12-22 20:09:55 +03:00
|
|
|
static int traverse_trees_recursive(int n, unsigned long dirmask,
|
|
|
|
unsigned long df_conflicts,
|
|
|
|
struct name_entry *names,
|
|
|
|
struct traverse_info *info)
|
2006-07-30 22:25:18 +04:00
|
|
|
{
|
2018-11-18 19:47:57 +03:00
|
|
|
struct unpack_trees_options *o = info->data;
|
unpack-trees.c: look ahead in the index
This makes the traversal of index be in sync with the tree traversal.
When unpack_callback() is fed a set of tree entries from trees, it
inspects the name of the entry and checks if the an index entry with
the same name could be hiding behind the current index entry, and
(1) if the name appears in the index as a leaf node, it is also
fed to the n_way_merge() callback function;
(2) if the name is a directory in the index, i.e. there are entries in
that are underneath it, then nothing is fed to the n_way_merge()
callback function;
(3) otherwise, if the name comes before the first eligible entry in the
index, the index entry is first unpacked alone.
When traverse_trees_recursive() descends into a subdirectory, the
cache_bottom pointer is moved to walk index entries within that directory.
All of these are omitted for diff-index, which does not even want to be
fed an index entry and a tree entry with D/F conflicts.
This fixes 3-way read-tree and exposes a bug in other parts of the system
in t6035, test #5. The test prepares these three trees:
O = HEAD^
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
A = HEAD
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob 587be6b4c3f93f93c489c0111bba5596147a26cb a/x
B = master
120000 blob a36b77384451ea1de7bd340ffca868249626bc52 a/b
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
With a clean index that matches HEAD, running
git read-tree -m -u --aggressive $O $A $B
now yields
120000 a36b77384451ea1de7bd340ffca868249626bc52 3 a/b
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 a/b-2/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 1 a/b/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 2 a/b/c/d
100644 587be6b4c3f93f93c489c0111bba5596147a26cb 0 a/x
which is correct. "master" created "a/b" symlink that did not exist,
and removed "a/b/c/d" while HEAD did not do touch either path.
Before this series, read-tree did not notice the situation and resolved
addition of "a/b" and removal of "a/b/c/d" independently. If A = HEAD had
another path "a/b/c/e" added, this merge should conflict but instead it
silently resolved "a/b" and then immediately overwrote it to add
"a/b/c/e", which was quite bogus.
Tests in t1012 start to work with this.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-20 11:03:39 +04:00
|
|
|
int i, ret, bottom;
|
2017-04-14 22:25:54 +03:00
|
|
|
int nr_buf = 0;
|
2008-03-14 08:07:18 +03:00
|
|
|
struct tree_desc t[MAX_UNPACK_TREES];
|
2010-08-10 07:33:44 +04:00
|
|
|
void *buf[MAX_UNPACK_TREES];
|
2008-03-06 07:15:44 +03:00
|
|
|
struct traverse_info newinfo;
|
|
|
|
struct name_entry *p;
|
2018-08-18 17:41:24 +03:00
|
|
|
int nr_entries;
|
|
|
|
|
|
|
|
nr_entries = all_trees_same_as_cache_tree(n, dirmask, names, info);
|
|
|
|
if (nr_entries > 0) {
|
|
|
|
int pos = index_pos_by_traverse_info(names, info);
|
|
|
|
|
|
|
|
if (!o->merge || df_conflicts)
|
|
|
|
BUG("Wrong condition to get here buddy");
|
|
|
|
|
|
|
|
/*
|
|
|
|
* All entries up to 'pos' must have been processed
|
|
|
|
* (i.e. marked CE_UNPACKED) at this point. But to be safe,
|
|
|
|
* save and restore cache_bottom anyway to not miss
|
|
|
|
* unprocessed entries before 'pos'.
|
|
|
|
*/
|
|
|
|
bottom = o->cache_bottom;
|
2019-03-20 11:15:07 +03:00
|
|
|
ret = traverse_by_cache_tree(pos, nr_entries, n, info);
|
2018-08-18 17:41:24 +03:00
|
|
|
o->cache_bottom = bottom;
|
|
|
|
return ret;
|
|
|
|
}
|
2008-03-06 07:15:44 +03:00
|
|
|
|
|
|
|
p = names;
|
|
|
|
while (!p->mode)
|
|
|
|
p++;
|
|
|
|
|
|
|
|
newinfo = *info;
|
|
|
|
newinfo.prev = info;
|
2011-08-29 23:31:06 +04:00
|
|
|
newinfo.pathspec = info->pathspec;
|
2019-07-31 07:38:15 +03:00
|
|
|
newinfo.name = p->path;
|
|
|
|
newinfo.namelen = p->pathlen;
|
|
|
|
newinfo.mode = p->mode;
|
2019-07-31 07:38:18 +03:00
|
|
|
newinfo.pathlen = st_add3(newinfo.pathlen, tree_entry_len(p), 1);
|
2013-06-16 03:44:43 +04:00
|
|
|
newinfo.df_conflicts |= df_conflicts;
|
2008-03-06 07:15:44 +03:00
|
|
|
|
2017-04-14 22:25:54 +03:00
|
|
|
/*
|
|
|
|
* Fetch the tree from the ODB for each peer directory in the
|
|
|
|
* n commits.
|
|
|
|
*
|
|
|
|
* For 2- and 3-way traversals, we try to avoid hitting the
|
|
|
|
* ODB twice for the same OID. This should yield a nice speed
|
|
|
|
* up in checkouts and merges when the commits are similar.
|
|
|
|
*
|
|
|
|
* We don't bother doing the full O(n^2) search for larger n,
|
|
|
|
* because wider traversals don't happen that often and we
|
|
|
|
* avoid the search setup.
|
|
|
|
*
|
|
|
|
* When 2 peer OIDs are the same, we just copy the tree
|
|
|
|
* descriptor data. This implicitly borrows the buffer
|
|
|
|
* data from the earlier cell.
|
|
|
|
*/
|
2008-03-06 07:15:44 +03:00
|
|
|
for (i = 0; i < n; i++, dirmask >>= 1) {
|
2017-04-14 22:25:54 +03:00
|
|
|
if (i > 0 && are_same_oid(&names[i], &names[i - 1]))
|
|
|
|
t[i] = t[i - 1];
|
|
|
|
else if (i > 1 && are_same_oid(&names[i], &names[i - 2]))
|
|
|
|
t[i] = t[i - 2];
|
|
|
|
else {
|
2017-08-12 11:32:59 +03:00
|
|
|
const struct object_id *oid = NULL;
|
2017-04-14 22:25:54 +03:00
|
|
|
if (dirmask & 1)
|
2019-01-15 03:39:44 +03:00
|
|
|
oid = &names[i].oid;
|
2019-06-27 12:28:48 +03:00
|
|
|
buf[nr_buf++] = fill_tree_descriptor(the_repository, t + i, oid);
|
2017-04-14 22:25:54 +03:00
|
|
|
}
|
2008-03-06 07:15:44 +03:00
|
|
|
}
|
unpack-trees.c: look ahead in the index
This makes the traversal of index be in sync with the tree traversal.
When unpack_callback() is fed a set of tree entries from trees, it
inspects the name of the entry and checks if the an index entry with
the same name could be hiding behind the current index entry, and
(1) if the name appears in the index as a leaf node, it is also
fed to the n_way_merge() callback function;
(2) if the name is a directory in the index, i.e. there are entries in
that are underneath it, then nothing is fed to the n_way_merge()
callback function;
(3) otherwise, if the name comes before the first eligible entry in the
index, the index entry is first unpacked alone.
When traverse_trees_recursive() descends into a subdirectory, the
cache_bottom pointer is moved to walk index entries within that directory.
All of these are omitted for diff-index, which does not even want to be
fed an index entry and a tree entry with D/F conflicts.
This fixes 3-way read-tree and exposes a bug in other parts of the system
in t6035, test #5. The test prepares these three trees:
O = HEAD^
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
A = HEAD
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob 587be6b4c3f93f93c489c0111bba5596147a26cb a/x
B = master
120000 blob a36b77384451ea1de7bd340ffca868249626bc52 a/b
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
With a clean index that matches HEAD, running
git read-tree -m -u --aggressive $O $A $B
now yields
120000 a36b77384451ea1de7bd340ffca868249626bc52 3 a/b
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 a/b-2/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 1 a/b/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 2 a/b/c/d
100644 587be6b4c3f93f93c489c0111bba5596147a26cb 0 a/x
which is correct. "master" created "a/b" symlink that did not exist,
and removed "a/b/c/d" while HEAD did not do touch either path.
Before this series, read-tree did not notice the situation and resolved
addition of "a/b" and removal of "a/b/c/d" independently. If A = HEAD had
another path "a/b/c/e" added, this merge should conflict but instead it
silently resolved "a/b" and then immediately overwrote it to add
"a/b/c/e", which was quite bogus.
Tests in t1012 start to work with this.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-20 11:03:39 +04:00
|
|
|
|
|
|
|
bottom = switch_cache_bottom(&newinfo);
|
2018-11-18 19:47:57 +03:00
|
|
|
ret = traverse_trees(o->src_index, n, t, &newinfo);
|
unpack-trees.c: look ahead in the index
This makes the traversal of index be in sync with the tree traversal.
When unpack_callback() is fed a set of tree entries from trees, it
inspects the name of the entry and checks if the an index entry with
the same name could be hiding behind the current index entry, and
(1) if the name appears in the index as a leaf node, it is also
fed to the n_way_merge() callback function;
(2) if the name is a directory in the index, i.e. there are entries in
that are underneath it, then nothing is fed to the n_way_merge()
callback function;
(3) otherwise, if the name comes before the first eligible entry in the
index, the index entry is first unpacked alone.
When traverse_trees_recursive() descends into a subdirectory, the
cache_bottom pointer is moved to walk index entries within that directory.
All of these are omitted for diff-index, which does not even want to be
fed an index entry and a tree entry with D/F conflicts.
This fixes 3-way read-tree and exposes a bug in other parts of the system
in t6035, test #5. The test prepares these three trees:
O = HEAD^
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
A = HEAD
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob 587be6b4c3f93f93c489c0111bba5596147a26cb a/x
B = master
120000 blob a36b77384451ea1de7bd340ffca868249626bc52 a/b
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
With a clean index that matches HEAD, running
git read-tree -m -u --aggressive $O $A $B
now yields
120000 a36b77384451ea1de7bd340ffca868249626bc52 3 a/b
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 a/b-2/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 1 a/b/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 2 a/b/c/d
100644 587be6b4c3f93f93c489c0111bba5596147a26cb 0 a/x
which is correct. "master" created "a/b" symlink that did not exist,
and removed "a/b/c/d" while HEAD did not do touch either path.
Before this series, read-tree did not notice the situation and resolved
addition of "a/b" and removal of "a/b/c/d" independently. If A = HEAD had
another path "a/b/c/e" added, this merge should conflict but instead it
silently resolved "a/b" and then immediately overwrote it to add
"a/b/c/e", which was quite bogus.
Tests in t1012 start to work with this.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-20 11:03:39 +04:00
|
|
|
restore_cache_bottom(&newinfo, bottom);
|
2010-08-10 07:33:44 +04:00
|
|
|
|
2017-04-14 22:25:54 +03:00
|
|
|
for (i = 0; i < nr_buf; i++)
|
2010-08-10 07:33:44 +04:00
|
|
|
free(buf[i]);
|
|
|
|
|
unpack-trees.c: look ahead in the index
This makes the traversal of index be in sync with the tree traversal.
When unpack_callback() is fed a set of tree entries from trees, it
inspects the name of the entry and checks if the an index entry with
the same name could be hiding behind the current index entry, and
(1) if the name appears in the index as a leaf node, it is also
fed to the n_way_merge() callback function;
(2) if the name is a directory in the index, i.e. there are entries in
that are underneath it, then nothing is fed to the n_way_merge()
callback function;
(3) otherwise, if the name comes before the first eligible entry in the
index, the index entry is first unpacked alone.
When traverse_trees_recursive() descends into a subdirectory, the
cache_bottom pointer is moved to walk index entries within that directory.
All of these are omitted for diff-index, which does not even want to be
fed an index entry and a tree entry with D/F conflicts.
This fixes 3-way read-tree and exposes a bug in other parts of the system
in t6035, test #5. The test prepares these three trees:
O = HEAD^
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
A = HEAD
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob 587be6b4c3f93f93c489c0111bba5596147a26cb a/x
B = master
120000 blob a36b77384451ea1de7bd340ffca868249626bc52 a/b
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
With a clean index that matches HEAD, running
git read-tree -m -u --aggressive $O $A $B
now yields
120000 a36b77384451ea1de7bd340ffca868249626bc52 3 a/b
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 a/b-2/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 1 a/b/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 2 a/b/c/d
100644 587be6b4c3f93f93c489c0111bba5596147a26cb 0 a/x
which is correct. "master" created "a/b" symlink that did not exist,
and removed "a/b/c/d" while HEAD did not do touch either path.
Before this series, read-tree did not notice the situation and resolved
addition of "a/b" and removal of "a/b/c/d" independently. If A = HEAD had
another path "a/b/c/e" added, this merge should conflict but instead it
silently resolved "a/b" and then immediately overwrote it to add
"a/b/c/e", which was quite bogus.
Tests in t1012 start to work with this.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-20 11:03:39 +04:00
|
|
|
return ret;
|
2008-03-06 07:15:44 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Compare the traverse-path to the cache entry without actually
|
|
|
|
* having to generate the textual representation of the traverse
|
|
|
|
* path.
|
|
|
|
*
|
|
|
|
* NOTE! This *only* compares up to the size of the traverse path
|
|
|
|
* itself - the caller needs to do the final check for the cache
|
|
|
|
* entry having more data at the end!
|
|
|
|
*/
|
2019-07-31 07:38:15 +03:00
|
|
|
static int do_compare_entry_piecewise(const struct cache_entry *ce,
|
|
|
|
const struct traverse_info *info,
|
|
|
|
const char *name, size_t namelen,
|
|
|
|
unsigned mode)
|
2008-03-06 07:15:44 +03:00
|
|
|
{
|
2019-07-31 07:38:15 +03:00
|
|
|
int pathlen, ce_len;
|
2008-03-06 07:15:44 +03:00
|
|
|
const char *ce_name;
|
|
|
|
|
|
|
|
if (info->prev) {
|
2015-12-22 01:34:20 +03:00
|
|
|
int cmp = do_compare_entry_piecewise(ce, info->prev,
|
2019-07-31 07:38:15 +03:00
|
|
|
info->name, info->namelen,
|
|
|
|
info->mode);
|
2008-03-06 07:15:44 +03:00
|
|
|
if (cmp)
|
|
|
|
return cmp;
|
|
|
|
}
|
|
|
|
pathlen = info->pathlen;
|
|
|
|
ce_len = ce_namelen(ce);
|
|
|
|
|
|
|
|
/* If ce_len < pathlen then we must have previously hit "name == directory" entry */
|
|
|
|
if (ce_len < pathlen)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
ce_len -= pathlen;
|
|
|
|
ce_name = ce->name + pathlen;
|
|
|
|
|
2019-07-31 07:38:15 +03:00
|
|
|
return df_name_compare(ce_name, ce_len, S_IFREG, name, namelen, mode);
|
2008-03-06 07:15:44 +03:00
|
|
|
}
|
|
|
|
|
2015-12-22 01:34:20 +03:00
|
|
|
static int do_compare_entry(const struct cache_entry *ce,
|
|
|
|
const struct traverse_info *info,
|
2019-07-31 07:38:15 +03:00
|
|
|
const char *name, size_t namelen,
|
|
|
|
unsigned mode)
|
2015-12-22 01:34:20 +03:00
|
|
|
{
|
2019-07-31 07:38:15 +03:00
|
|
|
int pathlen, ce_len;
|
2015-12-22 01:34:20 +03:00
|
|
|
const char *ce_name;
|
|
|
|
int cmp;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we have not precomputed the traverse path, it is quicker
|
|
|
|
* to avoid doing so. But if we have precomputed it,
|
|
|
|
* it is quicker to use the precomputed version.
|
|
|
|
*/
|
|
|
|
if (!info->traverse_path)
|
2019-07-31 07:38:15 +03:00
|
|
|
return do_compare_entry_piecewise(ce, info, name, namelen, mode);
|
2015-12-22 01:34:20 +03:00
|
|
|
|
|
|
|
cmp = strncmp(ce->name, info->traverse_path, info->pathlen);
|
|
|
|
if (cmp)
|
|
|
|
return cmp;
|
|
|
|
|
|
|
|
pathlen = info->pathlen;
|
|
|
|
ce_len = ce_namelen(ce);
|
|
|
|
|
|
|
|
if (ce_len < pathlen)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
ce_len -= pathlen;
|
|
|
|
ce_name = ce->name + pathlen;
|
|
|
|
|
2019-07-31 07:38:15 +03:00
|
|
|
return df_name_compare(ce_name, ce_len, S_IFREG, name, namelen, mode);
|
2015-12-22 01:34:20 +03:00
|
|
|
}
|
|
|
|
|
2008-03-06 07:15:44 +03:00
|
|
|
static int compare_entry(const struct cache_entry *ce, const struct traverse_info *info, const struct name_entry *n)
|
|
|
|
{
|
2019-07-31 07:38:15 +03:00
|
|
|
int cmp = do_compare_entry(ce, info, n->path, n->pathlen, n->mode);
|
2008-03-06 07:15:44 +03:00
|
|
|
if (cmp)
|
|
|
|
return cmp;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Even if the beginning compared identically, the ce should
|
|
|
|
* compare as bigger than a directory leading up to it!
|
|
|
|
*/
|
2019-07-31 07:38:20 +03:00
|
|
|
return ce_namelen(ce) > traverse_path_len(info, tree_entry_len(n));
|
2008-03-06 07:15:44 +03:00
|
|
|
}
|
|
|
|
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
static int ce_in_traverse_path(const struct cache_entry *ce,
|
|
|
|
const struct traverse_info *info)
|
|
|
|
{
|
|
|
|
if (!info->prev)
|
|
|
|
return 1;
|
2019-07-31 07:38:15 +03:00
|
|
|
if (do_compare_entry(ce, info->prev,
|
|
|
|
info->name, info->namelen, info->mode))
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
return 0;
|
|
|
|
/*
|
|
|
|
* If ce (blob) is the same name as the path (which is a tree
|
|
|
|
* we will be descending into), it won't be inside it.
|
|
|
|
*/
|
|
|
|
return (info->pathlen < ce_namelen(ce));
|
|
|
|
}
|
|
|
|
|
2018-07-02 22:49:31 +03:00
|
|
|
static struct cache_entry *create_ce_entry(const struct traverse_info *info,
|
|
|
|
const struct name_entry *n,
|
|
|
|
int stage,
|
|
|
|
struct index_state *istate,
|
|
|
|
int is_transient)
|
2008-03-06 07:15:44 +03:00
|
|
|
{
|
2019-07-31 07:38:20 +03:00
|
|
|
size_t len = traverse_path_len(info, tree_entry_len(n));
|
2018-07-02 22:49:31 +03:00
|
|
|
struct cache_entry *ce =
|
|
|
|
is_transient ?
|
2021-05-04 19:27:28 +03:00
|
|
|
make_empty_transient_cache_entry(len, NULL) :
|
2018-07-02 22:49:31 +03:00
|
|
|
make_empty_cache_entry(istate, len);
|
2008-03-06 07:15:44 +03:00
|
|
|
|
|
|
|
ce->ce_mode = create_ce_mode(n->mode);
|
2012-07-11 13:22:37 +04:00
|
|
|
ce->ce_flags = create_ce_flags(stage);
|
|
|
|
ce->ce_namelen = len;
|
2019-01-15 03:39:44 +03:00
|
|
|
oidcpy(&ce->oid, &n->oid);
|
2019-07-31 07:38:25 +03:00
|
|
|
/* len+1 because the cache_entry allocates space for NUL */
|
|
|
|
make_traverse_path(ce->name, len + 1, info, n->path, n->pathlen);
|
2008-03-06 07:15:44 +03:00
|
|
|
|
|
|
|
return ce;
|
|
|
|
}
|
|
|
|
|
2018-08-18 17:41:24 +03:00
|
|
|
/*
|
|
|
|
* Note that traverse_by_cache_tree() duplicates some logic in this function
|
|
|
|
* without actually calling it. If you change the logic here you may need to
|
|
|
|
* check and change there as well.
|
|
|
|
*/
|
2009-01-31 17:39:10 +03:00
|
|
|
static int unpack_nondirectories(int n, unsigned long mask,
|
|
|
|
unsigned long dirmask,
|
|
|
|
struct cache_entry **src,
|
|
|
|
const struct name_entry *names,
|
|
|
|
const struct traverse_info *info)
|
2008-03-06 07:15:44 +03:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct unpack_trees_options *o = info->data;
|
2013-06-16 03:44:43 +04:00
|
|
|
unsigned long conflicts = info->df_conflicts | dirmask;
|
2008-03-06 07:15:44 +03:00
|
|
|
|
|
|
|
/* Do we have *only* directories? Nothing to do */
|
|
|
|
if (mask == dirmask && !src[0])
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Ok, we've filled in up to any potential index entry in src[0],
|
|
|
|
* now do the rest.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
int stage;
|
|
|
|
unsigned int bit = 1ul << i;
|
|
|
|
if (conflicts & bit) {
|
|
|
|
src[i + o->merge] = o->df_conflict_entry;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!(mask & bit))
|
|
|
|
continue;
|
|
|
|
if (!o->merge)
|
|
|
|
stage = 0;
|
|
|
|
else if (i + 1 < o->head_idx)
|
|
|
|
stage = 1;
|
|
|
|
else if (i + 1 > o->head_idx)
|
|
|
|
stage = 3;
|
|
|
|
else
|
|
|
|
stage = 2;
|
2018-07-02 22:49:31 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If the merge bit is set, then the cache entries are
|
|
|
|
* discarded in the following block. In this case,
|
|
|
|
* construct "transient" cache_entries, as they are
|
|
|
|
* not stored in the index. otherwise construct the
|
|
|
|
* cache entry from the index aware logic.
|
|
|
|
*/
|
|
|
|
src[i + o->merge] = create_ce_entry(info, names + i, stage, &o->result, o->merge);
|
2008-03-06 07:15:44 +03:00
|
|
|
}
|
|
|
|
|
2013-06-02 19:46:57 +04:00
|
|
|
if (o->merge) {
|
|
|
|
int rc = call_unpack_fn((const struct cache_entry * const *)src,
|
|
|
|
o);
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
struct cache_entry *ce = src[i + o->merge];
|
|
|
|
if (ce != o->df_conflict_entry)
|
2018-07-02 22:49:31 +03:00
|
|
|
discard_cache_entry(ce);
|
2013-06-02 19:46:57 +04:00
|
|
|
}
|
|
|
|
return rc;
|
|
|
|
}
|
2008-03-06 07:15:44 +03:00
|
|
|
|
|
|
|
for (i = 0; i < n; i++)
|
read-tree A B C: do not create a bogus index and do not segfault
"git read-tree A B C..." without the "-m" (merge) option is a way to read
these trees on top of each other to get an overlay of them.
An ancient commit ee6566e (Rewrite read-tree, 2005-09-05) passed the
ADD_CACHE_SKIP_DFCHECK flag when calling add_index_entry() to add the
paths obtained from these trees to the index, but it is an incorrect use
of the flag. The flag is meant to be used by callers who know the
addition of the entry does not introduce a D/F conflict to the index in
order to avoid the overhead of checking.
This bug resulted in a bogus index that records both "x" and "x/z" as a
blob after reading three trees that have paths ("x"), ("x", "y"), and
("x/z", "y") respectively. 34110cd (Make 'unpack_trees()' have a separate
source and destination index, 2008-03-06) refactored the callsites of
add_index_entry() incorrectly and added more codepaths that use this flag
when it shouldn't be used.
Also, 0190457 (Move 'unpack_trees()' over to 'traverse_trees()' interface,
2008-03-05) introduced a bug to call add_index_entry() for the tree that
does not have the path in it, passing NULL as a cache entry. This caused
reading multiple trees, one of which has path "x" but another doesn't, to
segfault.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-03-12 10:02:12 +03:00
|
|
|
if (src[i] && src[i] != o->df_conflict_entry)
|
2014-11-24 21:36:51 +03:00
|
|
|
if (do_add_entry(o, src[i], 0, 0))
|
|
|
|
return -1;
|
|
|
|
|
2008-03-06 07:15:44 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-09-20 03:36:45 +04:00
|
|
|
static int unpack_failed(struct unpack_trees_options *o, const char *message)
|
|
|
|
{
|
|
|
|
discard_index(&o->result);
|
2019-03-22 12:31:36 +03:00
|
|
|
if (!o->quiet && !o->exiting_early) {
|
2009-09-20 03:36:45 +04:00
|
|
|
if (message)
|
|
|
|
return error("%s", message);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
unpack-trees.c: look ahead in the index
This makes the traversal of index be in sync with the tree traversal.
When unpack_callback() is fed a set of tree entries from trees, it
inspects the name of the entry and checks if the an index entry with
the same name could be hiding behind the current index entry, and
(1) if the name appears in the index as a leaf node, it is also
fed to the n_way_merge() callback function;
(2) if the name is a directory in the index, i.e. there are entries in
that are underneath it, then nothing is fed to the n_way_merge()
callback function;
(3) otherwise, if the name comes before the first eligible entry in the
index, the index entry is first unpacked alone.
When traverse_trees_recursive() descends into a subdirectory, the
cache_bottom pointer is moved to walk index entries within that directory.
All of these are omitted for diff-index, which does not even want to be
fed an index entry and a tree entry with D/F conflicts.
This fixes 3-way read-tree and exposes a bug in other parts of the system
in t6035, test #5. The test prepares these three trees:
O = HEAD^
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
A = HEAD
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob 587be6b4c3f93f93c489c0111bba5596147a26cb a/x
B = master
120000 blob a36b77384451ea1de7bd340ffca868249626bc52 a/b
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
With a clean index that matches HEAD, running
git read-tree -m -u --aggressive $O $A $B
now yields
120000 a36b77384451ea1de7bd340ffca868249626bc52 3 a/b
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 a/b-2/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 1 a/b/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 2 a/b/c/d
100644 587be6b4c3f93f93c489c0111bba5596147a26cb 0 a/x
which is correct. "master" created "a/b" symlink that did not exist,
and removed "a/b/c/d" while HEAD did not do touch either path.
Before this series, read-tree did not notice the situation and resolved
addition of "a/b" and removal of "a/b/c/d" independently. If A = HEAD had
another path "a/b/c/e" added, this merge should conflict but instead it
silently resolved "a/b" and then immediately overwrote it to add
"a/b/c/e", which was quite bogus.
Tests in t1012 start to work with this.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-20 11:03:39 +04:00
|
|
|
/*
|
|
|
|
* The tree traversal is looking at name p. If we have a matching entry,
|
|
|
|
* return it. If name p is a directory in the index, do not return
|
|
|
|
* anything, as we will want to match it when the traversal descends into
|
|
|
|
* the directory.
|
|
|
|
*/
|
|
|
|
static int find_cache_pos(struct traverse_info *info,
|
2019-07-31 07:38:15 +03:00
|
|
|
const char *p, size_t p_len)
|
unpack-trees.c: look ahead in the index
This makes the traversal of index be in sync with the tree traversal.
When unpack_callback() is fed a set of tree entries from trees, it
inspects the name of the entry and checks if the an index entry with
the same name could be hiding behind the current index entry, and
(1) if the name appears in the index as a leaf node, it is also
fed to the n_way_merge() callback function;
(2) if the name is a directory in the index, i.e. there are entries in
that are underneath it, then nothing is fed to the n_way_merge()
callback function;
(3) otherwise, if the name comes before the first eligible entry in the
index, the index entry is first unpacked alone.
When traverse_trees_recursive() descends into a subdirectory, the
cache_bottom pointer is moved to walk index entries within that directory.
All of these are omitted for diff-index, which does not even want to be
fed an index entry and a tree entry with D/F conflicts.
This fixes 3-way read-tree and exposes a bug in other parts of the system
in t6035, test #5. The test prepares these three trees:
O = HEAD^
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
A = HEAD
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob 587be6b4c3f93f93c489c0111bba5596147a26cb a/x
B = master
120000 blob a36b77384451ea1de7bd340ffca868249626bc52 a/b
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
With a clean index that matches HEAD, running
git read-tree -m -u --aggressive $O $A $B
now yields
120000 a36b77384451ea1de7bd340ffca868249626bc52 3 a/b
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 a/b-2/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 1 a/b/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 2 a/b/c/d
100644 587be6b4c3f93f93c489c0111bba5596147a26cb 0 a/x
which is correct. "master" created "a/b" symlink that did not exist,
and removed "a/b/c/d" while HEAD did not do touch either path.
Before this series, read-tree did not notice the situation and resolved
addition of "a/b" and removal of "a/b/c/d" independently. If A = HEAD had
another path "a/b/c/e" added, this merge should conflict but instead it
silently resolved "a/b" and then immediately overwrote it to add
"a/b/c/e", which was quite bogus.
Tests in t1012 start to work with this.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-20 11:03:39 +04:00
|
|
|
{
|
|
|
|
int pos;
|
|
|
|
struct unpack_trees_options *o = info->data;
|
|
|
|
struct index_state *index = o->src_index;
|
|
|
|
int pfxlen = info->pathlen;
|
|
|
|
|
|
|
|
for (pos = o->cache_bottom; pos < index->cache_nr; pos++) {
|
Convert "struct cache_entry *" to "const ..." wherever possible
I attempted to make index_state->cache[] a "const struct cache_entry **"
to find out how existing entries in index are modified and where. The
question I have is what do we do if we really need to keep track of on-disk
changes in the index. The result is
- diff-lib.c: setting CE_UPTODATE
- name-hash.c: setting CE_HASHED
- preload-index.c, read-cache.c, unpack-trees.c and
builtin/update-index: obvious
- entry.c: write_entry() may refresh the checked out entry via
fill_stat_cache_info(). This causes "non-const struct cache_entry
*" in builtin/apply.c, builtin/checkout-index.c and
builtin/checkout.c
- builtin/ls-files.c: --with-tree changes stagemask and may set
CE_UPDATE
Of these, write_entry() and its call sites are probably most
interesting because it modifies on-disk info. But this is stat info
and can be retrieved via refresh, at least for porcelain
commands. Other just uses ce_flags for local purposes.
So, keeping track of "dirty" entries is just a matter of setting a
flag in index modification functions exposed by read-cache.c. Except
unpack-trees, the rest of the code base does not do anything funny
behind read-cache's back.
The actual patch is less valueable than the summary above. But if
anyone wants to re-identify the above sites. Applying this patch, then
this:
diff --git a/cache.h b/cache.h
index 430d021..1692891 100644
--- a/cache.h
+++ b/cache.h
@@ -267,7 +267,7 @@ static inline unsigned int canon_mode(unsigned int mode)
#define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1)
struct index_state {
- struct cache_entry **cache;
+ const struct cache_entry **cache;
unsigned int version;
unsigned int cache_nr, cache_alloc, cache_changed;
struct string_list *resolve_undo;
will help quickly identify them without bogus warnings.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-09 19:29:00 +04:00
|
|
|
const struct cache_entry *ce = index->cache[pos];
|
unpack-trees.c: look ahead in the index
This makes the traversal of index be in sync with the tree traversal.
When unpack_callback() is fed a set of tree entries from trees, it
inspects the name of the entry and checks if the an index entry with
the same name could be hiding behind the current index entry, and
(1) if the name appears in the index as a leaf node, it is also
fed to the n_way_merge() callback function;
(2) if the name is a directory in the index, i.e. there are entries in
that are underneath it, then nothing is fed to the n_way_merge()
callback function;
(3) otherwise, if the name comes before the first eligible entry in the
index, the index entry is first unpacked alone.
When traverse_trees_recursive() descends into a subdirectory, the
cache_bottom pointer is moved to walk index entries within that directory.
All of these are omitted for diff-index, which does not even want to be
fed an index entry and a tree entry with D/F conflicts.
This fixes 3-way read-tree and exposes a bug in other parts of the system
in t6035, test #5. The test prepares these three trees:
O = HEAD^
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
A = HEAD
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob 587be6b4c3f93f93c489c0111bba5596147a26cb a/x
B = master
120000 blob a36b77384451ea1de7bd340ffca868249626bc52 a/b
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
With a clean index that matches HEAD, running
git read-tree -m -u --aggressive $O $A $B
now yields
120000 a36b77384451ea1de7bd340ffca868249626bc52 3 a/b
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 a/b-2/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 1 a/b/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 2 a/b/c/d
100644 587be6b4c3f93f93c489c0111bba5596147a26cb 0 a/x
which is correct. "master" created "a/b" symlink that did not exist,
and removed "a/b/c/d" while HEAD did not do touch either path.
Before this series, read-tree did not notice the situation and resolved
addition of "a/b" and removal of "a/b/c/d" independently. If A = HEAD had
another path "a/b/c/e" added, this merge should conflict but instead it
silently resolved "a/b" and then immediately overwrote it to add
"a/b/c/e", which was quite bogus.
Tests in t1012 start to work with this.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-20 11:03:39 +04:00
|
|
|
const char *ce_name, *ce_slash;
|
|
|
|
int cmp, ce_len;
|
|
|
|
|
unpack-trees: Make index lookahead less pessimal
When traversing trees with an index, the current index pointer
(o->cache_bottom) occasionally has to be temporarily advanced forwards to
match the traversal order of the tree, which is not the same as the sort
order of the index. The existing algorithm that did this (introduced in
730f72840cc50c523fe4cdd796ea2d2fc4571a28) would get "stuck" when the
cache_bottom was popped and then repeatedly check the same index entries
over and over. This represents a serious performance regression for
large repositories compared to the old "broken" traversal order.
This commit makes a simple change to mitigate this. Whenever
find_cache_pos sees that the current pos is also the cache_bottom, and
it has already been unpacked, it advances the cache_bottom as well as
the current pos. This prevents the above "sticking" behavior without
dramatically changing the algorithm.
In addition, this commit moves the unpacked check above the
ce_in_traverse_path() check. The simple bitmask check is cheaper, and
in the case described above will be firing quite a bit to advance the
cache_bottom after a tree pop.
This yields considerable performance improvements for large trees.
The following are the number of function calls for "git diff HEAD" on
the Linux kernel tree, with 33,307 files:
Symbol Calls Before Calls After
------------------- ------------ -----------
unpack_callback 35,332 35,332
find_cache_pos 37,357 37,357
ce_in_traverse_path 4,979,473 37,357
do_compare_entry 6,828,181 251,925
df_name_compare 6,828,181 251,925
And on a repository of 187,456 files:
Symbol Calls Before Calls After
------------------- ------------ -----------
unpack_callback 197,958 197,958
find_cache_pos 208,460 208,460
ce_in_traverse_path 37,308,336 208,460
do_compare_entry 156,950,469 2,690,626
df_name_compare 156,950,469 2,690,626
On the latter repository, user time for "git diff HEAD" was reduced from
5.58 to 0.42 seconds. This is compared to 0.30 seconds before the
traversal order fix was implemented.
Signed-off-by: Brian Downing <bdowning@lavos.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-06-11 06:59:07 +04:00
|
|
|
if (ce->ce_flags & CE_UNPACKED) {
|
|
|
|
/*
|
|
|
|
* cache_bottom entry is already unpacked, so
|
|
|
|
* we can never match it; don't check it
|
|
|
|
* again.
|
|
|
|
*/
|
|
|
|
if (pos == o->cache_bottom)
|
|
|
|
++o->cache_bottom;
|
unpack-trees.c: look ahead in the index
This makes the traversal of index be in sync with the tree traversal.
When unpack_callback() is fed a set of tree entries from trees, it
inspects the name of the entry and checks if the an index entry with
the same name could be hiding behind the current index entry, and
(1) if the name appears in the index as a leaf node, it is also
fed to the n_way_merge() callback function;
(2) if the name is a directory in the index, i.e. there are entries in
that are underneath it, then nothing is fed to the n_way_merge()
callback function;
(3) otherwise, if the name comes before the first eligible entry in the
index, the index entry is first unpacked alone.
When traverse_trees_recursive() descends into a subdirectory, the
cache_bottom pointer is moved to walk index entries within that directory.
All of these are omitted for diff-index, which does not even want to be
fed an index entry and a tree entry with D/F conflicts.
This fixes 3-way read-tree and exposes a bug in other parts of the system
in t6035, test #5. The test prepares these three trees:
O = HEAD^
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
A = HEAD
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob 587be6b4c3f93f93c489c0111bba5596147a26cb a/x
B = master
120000 blob a36b77384451ea1de7bd340ffca868249626bc52 a/b
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
With a clean index that matches HEAD, running
git read-tree -m -u --aggressive $O $A $B
now yields
120000 a36b77384451ea1de7bd340ffca868249626bc52 3 a/b
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 a/b-2/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 1 a/b/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 2 a/b/c/d
100644 587be6b4c3f93f93c489c0111bba5596147a26cb 0 a/x
which is correct. "master" created "a/b" symlink that did not exist,
and removed "a/b/c/d" while HEAD did not do touch either path.
Before this series, read-tree did not notice the situation and resolved
addition of "a/b" and removal of "a/b/c/d" independently. If A = HEAD had
another path "a/b/c/e" added, this merge should conflict but instead it
silently resolved "a/b" and then immediately overwrote it to add
"a/b/c/e", which was quite bogus.
Tests in t1012 start to work with this.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-20 11:03:39 +04:00
|
|
|
continue;
|
unpack-trees: Make index lookahead less pessimal
When traversing trees with an index, the current index pointer
(o->cache_bottom) occasionally has to be temporarily advanced forwards to
match the traversal order of the tree, which is not the same as the sort
order of the index. The existing algorithm that did this (introduced in
730f72840cc50c523fe4cdd796ea2d2fc4571a28) would get "stuck" when the
cache_bottom was popped and then repeatedly check the same index entries
over and over. This represents a serious performance regression for
large repositories compared to the old "broken" traversal order.
This commit makes a simple change to mitigate this. Whenever
find_cache_pos sees that the current pos is also the cache_bottom, and
it has already been unpacked, it advances the cache_bottom as well as
the current pos. This prevents the above "sticking" behavior without
dramatically changing the algorithm.
In addition, this commit moves the unpacked check above the
ce_in_traverse_path() check. The simple bitmask check is cheaper, and
in the case described above will be firing quite a bit to advance the
cache_bottom after a tree pop.
This yields considerable performance improvements for large trees.
The following are the number of function calls for "git diff HEAD" on
the Linux kernel tree, with 33,307 files:
Symbol Calls Before Calls After
------------------- ------------ -----------
unpack_callback 35,332 35,332
find_cache_pos 37,357 37,357
ce_in_traverse_path 4,979,473 37,357
do_compare_entry 6,828,181 251,925
df_name_compare 6,828,181 251,925
And on a repository of 187,456 files:
Symbol Calls Before Calls After
------------------- ------------ -----------
unpack_callback 197,958 197,958
find_cache_pos 208,460 208,460
ce_in_traverse_path 37,308,336 208,460
do_compare_entry 156,950,469 2,690,626
df_name_compare 156,950,469 2,690,626
On the latter repository, user time for "git diff HEAD" was reduced from
5.58 to 0.42 seconds. This is compared to 0.30 seconds before the
traversal order fix was implemented.
Signed-off-by: Brian Downing <bdowning@lavos.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-06-11 06:59:07 +04:00
|
|
|
}
|
2016-01-22 22:58:43 +03:00
|
|
|
if (!ce_in_traverse_path(ce, info)) {
|
|
|
|
/*
|
|
|
|
* Check if we can skip future cache checks
|
|
|
|
* (because we're already past all possible
|
|
|
|
* entries in the traverse path).
|
|
|
|
*/
|
|
|
|
if (info->traverse_path) {
|
|
|
|
if (strncmp(ce->name, info->traverse_path,
|
|
|
|
info->pathlen) > 0)
|
|
|
|
break;
|
|
|
|
}
|
unpack-trees.c: look ahead in the index
This makes the traversal of index be in sync with the tree traversal.
When unpack_callback() is fed a set of tree entries from trees, it
inspects the name of the entry and checks if the an index entry with
the same name could be hiding behind the current index entry, and
(1) if the name appears in the index as a leaf node, it is also
fed to the n_way_merge() callback function;
(2) if the name is a directory in the index, i.e. there are entries in
that are underneath it, then nothing is fed to the n_way_merge()
callback function;
(3) otherwise, if the name comes before the first eligible entry in the
index, the index entry is first unpacked alone.
When traverse_trees_recursive() descends into a subdirectory, the
cache_bottom pointer is moved to walk index entries within that directory.
All of these are omitted for diff-index, which does not even want to be
fed an index entry and a tree entry with D/F conflicts.
This fixes 3-way read-tree and exposes a bug in other parts of the system
in t6035, test #5. The test prepares these three trees:
O = HEAD^
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
A = HEAD
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob 587be6b4c3f93f93c489c0111bba5596147a26cb a/x
B = master
120000 blob a36b77384451ea1de7bd340ffca868249626bc52 a/b
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
With a clean index that matches HEAD, running
git read-tree -m -u --aggressive $O $A $B
now yields
120000 a36b77384451ea1de7bd340ffca868249626bc52 3 a/b
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 a/b-2/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 1 a/b/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 2 a/b/c/d
100644 587be6b4c3f93f93c489c0111bba5596147a26cb 0 a/x
which is correct. "master" created "a/b" symlink that did not exist,
and removed "a/b/c/d" while HEAD did not do touch either path.
Before this series, read-tree did not notice the situation and resolved
addition of "a/b" and removal of "a/b/c/d" independently. If A = HEAD had
another path "a/b/c/e" added, this merge should conflict but instead it
silently resolved "a/b" and then immediately overwrote it to add
"a/b/c/e", which was quite bogus.
Tests in t1012 start to work with this.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-20 11:03:39 +04:00
|
|
|
continue;
|
2016-01-22 22:58:43 +03:00
|
|
|
}
|
unpack-trees.c: look ahead in the index
This makes the traversal of index be in sync with the tree traversal.
When unpack_callback() is fed a set of tree entries from trees, it
inspects the name of the entry and checks if the an index entry with
the same name could be hiding behind the current index entry, and
(1) if the name appears in the index as a leaf node, it is also
fed to the n_way_merge() callback function;
(2) if the name is a directory in the index, i.e. there are entries in
that are underneath it, then nothing is fed to the n_way_merge()
callback function;
(3) otherwise, if the name comes before the first eligible entry in the
index, the index entry is first unpacked alone.
When traverse_trees_recursive() descends into a subdirectory, the
cache_bottom pointer is moved to walk index entries within that directory.
All of these are omitted for diff-index, which does not even want to be
fed an index entry and a tree entry with D/F conflicts.
This fixes 3-way read-tree and exposes a bug in other parts of the system
in t6035, test #5. The test prepares these three trees:
O = HEAD^
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
A = HEAD
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob 587be6b4c3f93f93c489c0111bba5596147a26cb a/x
B = master
120000 blob a36b77384451ea1de7bd340ffca868249626bc52 a/b
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
With a clean index that matches HEAD, running
git read-tree -m -u --aggressive $O $A $B
now yields
120000 a36b77384451ea1de7bd340ffca868249626bc52 3 a/b
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 a/b-2/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 1 a/b/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 2 a/b/c/d
100644 587be6b4c3f93f93c489c0111bba5596147a26cb 0 a/x
which is correct. "master" created "a/b" symlink that did not exist,
and removed "a/b/c/d" while HEAD did not do touch either path.
Before this series, read-tree did not notice the situation and resolved
addition of "a/b" and removal of "a/b/c/d" independently. If A = HEAD had
another path "a/b/c/e" added, this merge should conflict but instead it
silently resolved "a/b" and then immediately overwrote it to add
"a/b/c/e", which was quite bogus.
Tests in t1012 start to work with this.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-20 11:03:39 +04:00
|
|
|
ce_name = ce->name + pfxlen;
|
|
|
|
ce_slash = strchr(ce_name, '/');
|
|
|
|
if (ce_slash)
|
|
|
|
ce_len = ce_slash - ce_name;
|
|
|
|
else
|
|
|
|
ce_len = ce_namelen(ce) - pfxlen;
|
2019-07-31 07:38:15 +03:00
|
|
|
cmp = name_compare(p, p_len, ce_name, ce_len);
|
unpack-trees.c: look ahead in the index
This makes the traversal of index be in sync with the tree traversal.
When unpack_callback() is fed a set of tree entries from trees, it
inspects the name of the entry and checks if the an index entry with
the same name could be hiding behind the current index entry, and
(1) if the name appears in the index as a leaf node, it is also
fed to the n_way_merge() callback function;
(2) if the name is a directory in the index, i.e. there are entries in
that are underneath it, then nothing is fed to the n_way_merge()
callback function;
(3) otherwise, if the name comes before the first eligible entry in the
index, the index entry is first unpacked alone.
When traverse_trees_recursive() descends into a subdirectory, the
cache_bottom pointer is moved to walk index entries within that directory.
All of these are omitted for diff-index, which does not even want to be
fed an index entry and a tree entry with D/F conflicts.
This fixes 3-way read-tree and exposes a bug in other parts of the system
in t6035, test #5. The test prepares these three trees:
O = HEAD^
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
A = HEAD
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob 587be6b4c3f93f93c489c0111bba5596147a26cb a/x
B = master
120000 blob a36b77384451ea1de7bd340ffca868249626bc52 a/b
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
With a clean index that matches HEAD, running
git read-tree -m -u --aggressive $O $A $B
now yields
120000 a36b77384451ea1de7bd340ffca868249626bc52 3 a/b
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 a/b-2/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 1 a/b/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 2 a/b/c/d
100644 587be6b4c3f93f93c489c0111bba5596147a26cb 0 a/x
which is correct. "master" created "a/b" symlink that did not exist,
and removed "a/b/c/d" while HEAD did not do touch either path.
Before this series, read-tree did not notice the situation and resolved
addition of "a/b" and removal of "a/b/c/d" independently. If A = HEAD had
another path "a/b/c/e" added, this merge should conflict but instead it
silently resolved "a/b" and then immediately overwrote it to add
"a/b/c/e", which was quite bogus.
Tests in t1012 start to work with this.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-20 11:03:39 +04:00
|
|
|
/*
|
|
|
|
* Exact match; if we have a directory we need to
|
|
|
|
* delay returning it.
|
|
|
|
*/
|
|
|
|
if (!cmp)
|
|
|
|
return ce_slash ? -2 - pos : pos;
|
|
|
|
if (0 < cmp)
|
|
|
|
continue; /* keep looking */
|
|
|
|
/*
|
|
|
|
* ce_name sorts after p->path; could it be that we
|
|
|
|
* have files under p->path directory in the index?
|
|
|
|
* E.g. ce_name == "t-i", and p->path == "t"; we may
|
|
|
|
* have "t/a" in the index.
|
|
|
|
*/
|
2019-07-31 07:38:15 +03:00
|
|
|
if (p_len < ce_len && !memcmp(ce_name, p, p_len) &&
|
unpack-trees.c: look ahead in the index
This makes the traversal of index be in sync with the tree traversal.
When unpack_callback() is fed a set of tree entries from trees, it
inspects the name of the entry and checks if the an index entry with
the same name could be hiding behind the current index entry, and
(1) if the name appears in the index as a leaf node, it is also
fed to the n_way_merge() callback function;
(2) if the name is a directory in the index, i.e. there are entries in
that are underneath it, then nothing is fed to the n_way_merge()
callback function;
(3) otherwise, if the name comes before the first eligible entry in the
index, the index entry is first unpacked alone.
When traverse_trees_recursive() descends into a subdirectory, the
cache_bottom pointer is moved to walk index entries within that directory.
All of these are omitted for diff-index, which does not even want to be
fed an index entry and a tree entry with D/F conflicts.
This fixes 3-way read-tree and exposes a bug in other parts of the system
in t6035, test #5. The test prepares these three trees:
O = HEAD^
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
A = HEAD
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob 587be6b4c3f93f93c489c0111bba5596147a26cb a/x
B = master
120000 blob a36b77384451ea1de7bd340ffca868249626bc52 a/b
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
With a clean index that matches HEAD, running
git read-tree -m -u --aggressive $O $A $B
now yields
120000 a36b77384451ea1de7bd340ffca868249626bc52 3 a/b
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 a/b-2/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 1 a/b/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 2 a/b/c/d
100644 587be6b4c3f93f93c489c0111bba5596147a26cb 0 a/x
which is correct. "master" created "a/b" symlink that did not exist,
and removed "a/b/c/d" while HEAD did not do touch either path.
Before this series, read-tree did not notice the situation and resolved
addition of "a/b" and removal of "a/b/c/d" independently. If A = HEAD had
another path "a/b/c/e" added, this merge should conflict but instead it
silently resolved "a/b" and then immediately overwrote it to add
"a/b/c/e", which was quite bogus.
Tests in t1012 start to work with this.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-20 11:03:39 +04:00
|
|
|
ce_name[p_len] < '/')
|
|
|
|
continue; /* keep looking */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct cache_entry *find_cache_entry(struct traverse_info *info,
|
|
|
|
const struct name_entry *p)
|
|
|
|
{
|
2019-07-31 07:38:15 +03:00
|
|
|
int pos = find_cache_pos(info, p->path, p->pathlen);
|
unpack-trees.c: look ahead in the index
This makes the traversal of index be in sync with the tree traversal.
When unpack_callback() is fed a set of tree entries from trees, it
inspects the name of the entry and checks if the an index entry with
the same name could be hiding behind the current index entry, and
(1) if the name appears in the index as a leaf node, it is also
fed to the n_way_merge() callback function;
(2) if the name is a directory in the index, i.e. there are entries in
that are underneath it, then nothing is fed to the n_way_merge()
callback function;
(3) otherwise, if the name comes before the first eligible entry in the
index, the index entry is first unpacked alone.
When traverse_trees_recursive() descends into a subdirectory, the
cache_bottom pointer is moved to walk index entries within that directory.
All of these are omitted for diff-index, which does not even want to be
fed an index entry and a tree entry with D/F conflicts.
This fixes 3-way read-tree and exposes a bug in other parts of the system
in t6035, test #5. The test prepares these three trees:
O = HEAD^
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
A = HEAD
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob 587be6b4c3f93f93c489c0111bba5596147a26cb a/x
B = master
120000 blob a36b77384451ea1de7bd340ffca868249626bc52 a/b
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
With a clean index that matches HEAD, running
git read-tree -m -u --aggressive $O $A $B
now yields
120000 a36b77384451ea1de7bd340ffca868249626bc52 3 a/b
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 a/b-2/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 1 a/b/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 2 a/b/c/d
100644 587be6b4c3f93f93c489c0111bba5596147a26cb 0 a/x
which is correct. "master" created "a/b" symlink that did not exist,
and removed "a/b/c/d" while HEAD did not do touch either path.
Before this series, read-tree did not notice the situation and resolved
addition of "a/b" and removal of "a/b/c/d" independently. If A = HEAD had
another path "a/b/c/e" added, this merge should conflict but instead it
silently resolved "a/b" and then immediately overwrote it to add
"a/b/c/e", which was quite bogus.
Tests in t1012 start to work with this.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-20 11:03:39 +04:00
|
|
|
struct unpack_trees_options *o = info->data;
|
|
|
|
|
|
|
|
if (0 <= pos)
|
|
|
|
return o->src_index->cache[pos];
|
|
|
|
else
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2009-09-14 13:22:00 +04:00
|
|
|
static void debug_path(struct traverse_info *info)
|
|
|
|
{
|
|
|
|
if (info->prev) {
|
|
|
|
debug_path(info->prev);
|
2019-07-31 07:38:15 +03:00
|
|
|
if (*info->prev->name)
|
2009-09-14 13:22:00 +04:00
|
|
|
putchar('/');
|
|
|
|
}
|
2019-07-31 07:38:15 +03:00
|
|
|
printf("%s", info->name);
|
2009-09-14 13:22:00 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static void debug_name_entry(int i, struct name_entry *n)
|
|
|
|
{
|
|
|
|
printf("ent#%d %06o %s\n", i,
|
|
|
|
n->path ? n->mode : 0,
|
|
|
|
n->path ? n->path : "(missing)");
|
|
|
|
}
|
|
|
|
|
|
|
|
static void debug_unpack_callback(int n,
|
|
|
|
unsigned long mask,
|
|
|
|
unsigned long dirmask,
|
|
|
|
struct name_entry *names,
|
|
|
|
struct traverse_info *info)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
printf("* unpack mask %lu, dirmask %lu, cnt %d ",
|
|
|
|
mask, dirmask, n);
|
|
|
|
debug_path(info);
|
|
|
|
putchar('\n');
|
|
|
|
for (i = 0; i < n; i++)
|
|
|
|
debug_name_entry(i, names + i);
|
|
|
|
}
|
|
|
|
|
2018-08-18 17:41:24 +03:00
|
|
|
/*
|
|
|
|
* Note that traverse_by_cache_tree() duplicates some logic in this function
|
|
|
|
* without actually calling it. If you change the logic here you may need to
|
|
|
|
* check and change there as well.
|
|
|
|
*/
|
2008-03-06 07:15:44 +03:00
|
|
|
static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, struct name_entry *names, struct traverse_info *info)
|
|
|
|
{
|
2009-01-31 17:39:10 +03:00
|
|
|
struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, };
|
2008-03-06 07:15:44 +03:00
|
|
|
struct unpack_trees_options *o = info->data;
|
|
|
|
const struct name_entry *p = names;
|
|
|
|
|
|
|
|
/* Find first entry with a real name (we could use "mask" too) */
|
|
|
|
while (!p->mode)
|
|
|
|
p++;
|
|
|
|
|
2009-09-14 13:22:00 +04:00
|
|
|
if (o->debug_unpack)
|
|
|
|
debug_unpack_callback(n, mask, dirmask, names, info);
|
|
|
|
|
2008-03-06 07:15:44 +03:00
|
|
|
/* Are we supposed to look at the index too? */
|
|
|
|
if (o->merge) {
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
while (1) {
|
|
|
|
int cmp;
|
unpack-trees.c: look ahead in the index
This makes the traversal of index be in sync with the tree traversal.
When unpack_callback() is fed a set of tree entries from trees, it
inspects the name of the entry and checks if the an index entry with
the same name could be hiding behind the current index entry, and
(1) if the name appears in the index as a leaf node, it is also
fed to the n_way_merge() callback function;
(2) if the name is a directory in the index, i.e. there are entries in
that are underneath it, then nothing is fed to the n_way_merge()
callback function;
(3) otherwise, if the name comes before the first eligible entry in the
index, the index entry is first unpacked alone.
When traverse_trees_recursive() descends into a subdirectory, the
cache_bottom pointer is moved to walk index entries within that directory.
All of these are omitted for diff-index, which does not even want to be
fed an index entry and a tree entry with D/F conflicts.
This fixes 3-way read-tree and exposes a bug in other parts of the system
in t6035, test #5. The test prepares these three trees:
O = HEAD^
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
A = HEAD
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob 587be6b4c3f93f93c489c0111bba5596147a26cb a/x
B = master
120000 blob a36b77384451ea1de7bd340ffca868249626bc52 a/b
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
With a clean index that matches HEAD, running
git read-tree -m -u --aggressive $O $A $B
now yields
120000 a36b77384451ea1de7bd340ffca868249626bc52 3 a/b
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 a/b-2/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 1 a/b/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 2 a/b/c/d
100644 587be6b4c3f93f93c489c0111bba5596147a26cb 0 a/x
which is correct. "master" created "a/b" symlink that did not exist,
and removed "a/b/c/d" while HEAD did not do touch either path.
Before this series, read-tree did not notice the situation and resolved
addition of "a/b" and removal of "a/b/c/d" independently. If A = HEAD had
another path "a/b/c/e" added, this merge should conflict but instead it
silently resolved "a/b" and then immediately overwrote it to add
"a/b/c/e", which was quite bogus.
Tests in t1012 start to work with this.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-20 11:03:39 +04:00
|
|
|
struct cache_entry *ce;
|
|
|
|
|
|
|
|
if (o->diff_index_cached)
|
|
|
|
ce = next_cache_entry(o);
|
|
|
|
else
|
|
|
|
ce = find_cache_entry(info, p);
|
|
|
|
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
if (!ce)
|
|
|
|
break;
|
|
|
|
cmp = compare_entry(ce, info, p);
|
2008-03-06 07:15:44 +03:00
|
|
|
if (cmp < 0) {
|
|
|
|
if (unpack_index_entry(ce, o) < 0)
|
2009-09-20 03:36:45 +04:00
|
|
|
return unpack_failed(o, NULL);
|
2008-03-06 07:15:44 +03:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!cmp) {
|
|
|
|
if (ce_stage(ce)) {
|
|
|
|
/*
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
* If we skip unmerged index
|
|
|
|
* entries, we'll skip this
|
|
|
|
* entry *and* the tree
|
|
|
|
* entries associated with it!
|
2008-03-06 07:15:44 +03:00
|
|
|
*/
|
2008-03-07 05:12:28 +03:00
|
|
|
if (o->skip_unmerged) {
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
add_same_unmerged(ce, o);
|
2008-03-06 07:15:44 +03:00
|
|
|
return mask;
|
2008-03-07 05:12:28 +03:00
|
|
|
}
|
2008-03-06 07:15:44 +03:00
|
|
|
}
|
|
|
|
src[0] = ce;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-03-07 05:12:28 +03:00
|
|
|
if (unpack_nondirectories(n, mask, dirmask, src, names, info) < 0)
|
2008-03-06 07:15:44 +03:00
|
|
|
return -1;
|
|
|
|
|
2012-04-10 22:55:58 +04:00
|
|
|
if (o->merge && src[0]) {
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
if (ce_stage(src[0]))
|
|
|
|
mark_ce_used_same_name(src[0], o);
|
|
|
|
else
|
|
|
|
mark_ce_used(src[0], o);
|
|
|
|
}
|
|
|
|
|
2008-03-06 07:15:44 +03:00
|
|
|
/* Now handle any directories.. */
|
|
|
|
if (dirmask) {
|
Optimize "diff-index --cached" using cache-tree
When running "diff-index --cached" after making a change to only a small
portion of the index, there is no point unpacking unchanged subtrees into
the index recursively, only to find that all entries match anyway. Tweak
unpack_trees() logic that is used to read in the tree object to catch the
case where the tree entry we are looking at matches the index as a whole
by looking at the cache-tree.
As an exercise, after modifying a few paths in the kernel tree, here are
a few numbers on my Athlon 64X2 3800+:
(without patch, hot cache)
$ /usr/bin/time git diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.07user 0.02system 0:00.09elapsed 102%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+9407minor)pagefaults 0swaps
(with patch, hot cache)
$ /usr/bin/time ../git.git/git-diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.02user 0.00system 0:00.02elapsed 103%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+2446minor)pagefaults 0swaps
Cold cache numbers are very impressive, but it does not matter very much
in practice:
(without patch, cold cache)
$ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches'
$ /usr/bin/time git diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.06user 0.17system 0:10.26elapsed 2%CPU (0avgtext+0avgdata 0maxresident)k
247032inputs+0outputs (1172major+8237minor)pagefaults 0swaps
(with patch, cold cache)
$ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches'
$ /usr/bin/time ../git.git/git-diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.02user 0.01system 0:01.01elapsed 3%CPU (0avgtext+0avgdata 0maxresident)k
18440inputs+0outputs (79major+2369minor)pagefaults 0swaps
This of course helps "git status" as well.
(without patch, hot cache)
$ /usr/bin/time ../git.git/git-status >/dev/null
0.17user 0.18system 0:00.35elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+5336outputs (0major+10970minor)pagefaults 0swaps
(with patch, hot cache)
$ /usr/bin/time ../git.git/git-status >/dev/null
0.10user 0.16system 0:00.27elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+5336outputs (0major+3921minor)pagefaults 0swaps
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-05-21 02:57:22 +04:00
|
|
|
/* special case: "diff-index --cached" looking at a tree */
|
|
|
|
if (o->diff_index_cached &&
|
|
|
|
n == 1 && dirmask == 1 && S_ISDIR(names->mode)) {
|
|
|
|
int matches;
|
|
|
|
matches = cache_tree_matches_traversal(o->src_index->cache_tree,
|
|
|
|
names, info);
|
|
|
|
/*
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
* Everything under the name matches; skip the
|
|
|
|
* entire hierarchy. diff_index_cached codepath
|
|
|
|
* special cases D/F conflicts in such a way that
|
|
|
|
* it does not do any look-ahead, so this is safe.
|
Optimize "diff-index --cached" using cache-tree
When running "diff-index --cached" after making a change to only a small
portion of the index, there is no point unpacking unchanged subtrees into
the index recursively, only to find that all entries match anyway. Tweak
unpack_trees() logic that is used to read in the tree object to catch the
case where the tree entry we are looking at matches the index as a whole
by looking at the cache-tree.
As an exercise, after modifying a few paths in the kernel tree, here are
a few numbers on my Athlon 64X2 3800+:
(without patch, hot cache)
$ /usr/bin/time git diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.07user 0.02system 0:00.09elapsed 102%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+9407minor)pagefaults 0swaps
(with patch, hot cache)
$ /usr/bin/time ../git.git/git-diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.02user 0.00system 0:00.02elapsed 103%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+2446minor)pagefaults 0swaps
Cold cache numbers are very impressive, but it does not matter very much
in practice:
(without patch, cold cache)
$ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches'
$ /usr/bin/time git diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.06user 0.17system 0:10.26elapsed 2%CPU (0avgtext+0avgdata 0maxresident)k
247032inputs+0outputs (1172major+8237minor)pagefaults 0swaps
(with patch, cold cache)
$ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches'
$ /usr/bin/time ../git.git/git-diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.02user 0.01system 0:01.01elapsed 3%CPU (0avgtext+0avgdata 0maxresident)k
18440inputs+0outputs (79major+2369minor)pagefaults 0swaps
This of course helps "git status" as well.
(without patch, hot cache)
$ /usr/bin/time ../git.git/git-status >/dev/null
0.17user 0.18system 0:00.35elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+5336outputs (0major+10970minor)pagefaults 0swaps
(with patch, hot cache)
$ /usr/bin/time ../git.git/git-status >/dev/null
0.10user 0.16system 0:00.27elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+5336outputs (0major+3921minor)pagefaults 0swaps
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-05-21 02:57:22 +04:00
|
|
|
*/
|
|
|
|
if (matches) {
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
o->cache_bottom += matches;
|
Optimize "diff-index --cached" using cache-tree
When running "diff-index --cached" after making a change to only a small
portion of the index, there is no point unpacking unchanged subtrees into
the index recursively, only to find that all entries match anyway. Tweak
unpack_trees() logic that is used to read in the tree object to catch the
case where the tree entry we are looking at matches the index as a whole
by looking at the cache-tree.
As an exercise, after modifying a few paths in the kernel tree, here are
a few numbers on my Athlon 64X2 3800+:
(without patch, hot cache)
$ /usr/bin/time git diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.07user 0.02system 0:00.09elapsed 102%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+9407minor)pagefaults 0swaps
(with patch, hot cache)
$ /usr/bin/time ../git.git/git-diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.02user 0.00system 0:00.02elapsed 103%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+2446minor)pagefaults 0swaps
Cold cache numbers are very impressive, but it does not matter very much
in practice:
(without patch, cold cache)
$ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches'
$ /usr/bin/time git diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.06user 0.17system 0:10.26elapsed 2%CPU (0avgtext+0avgdata 0maxresident)k
247032inputs+0outputs (1172major+8237minor)pagefaults 0swaps
(with patch, cold cache)
$ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches'
$ /usr/bin/time ../git.git/git-diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.02user 0.01system 0:01.01elapsed 3%CPU (0avgtext+0avgdata 0maxresident)k
18440inputs+0outputs (79major+2369minor)pagefaults 0swaps
This of course helps "git status" as well.
(without patch, hot cache)
$ /usr/bin/time ../git.git/git-status >/dev/null
0.17user 0.18system 0:00.35elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+5336outputs (0major+10970minor)pagefaults 0swaps
(with patch, hot cache)
$ /usr/bin/time ../git.git/git-status >/dev/null
0.10user 0.16system 0:00.27elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+5336outputs (0major+3921minor)pagefaults 0swaps
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-05-21 02:57:22 +04:00
|
|
|
return mask;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-06-16 03:44:43 +04:00
|
|
|
if (traverse_trees_recursive(n, dirmask, mask & ~dirmask,
|
2008-03-10 11:26:23 +03:00
|
|
|
names, info) < 0)
|
|
|
|
return -1;
|
2008-03-06 07:15:44 +03:00
|
|
|
return mask;
|
|
|
|
}
|
|
|
|
|
|
|
|
return mask;
|
|
|
|
}
|
|
|
|
|
2018-08-13 19:14:28 +03:00
|
|
|
static int clear_ce_flags_1(struct index_state *istate,
|
|
|
|
struct cache_entry **cache, int nr,
|
2013-12-14 15:31:16 +04:00
|
|
|
struct strbuf *prefix,
|
2011-05-09 19:43:01 +04:00
|
|
|
int select_mask, int clear_mask,
|
2019-09-03 21:04:58 +03:00
|
|
|
struct pattern_list *pl,
|
2019-11-22 01:04:44 +03:00
|
|
|
enum pattern_match_result default_match,
|
|
|
|
int progress_nr);
|
2011-05-09 19:43:01 +04:00
|
|
|
|
2010-11-26 21:17:46 +03:00
|
|
|
/* Whole directory matching */
|
2018-08-13 19:14:28 +03:00
|
|
|
static int clear_ce_flags_dir(struct index_state *istate,
|
|
|
|
struct cache_entry **cache, int nr,
|
2013-12-14 15:31:16 +04:00
|
|
|
struct strbuf *prefix,
|
2010-11-26 21:17:46 +03:00
|
|
|
char *basename,
|
|
|
|
int select_mask, int clear_mask,
|
2019-09-03 21:04:58 +03:00
|
|
|
struct pattern_list *pl,
|
2019-11-22 01:04:44 +03:00
|
|
|
enum pattern_match_result default_match,
|
|
|
|
int progress_nr)
|
2010-11-26 21:17:46 +03:00
|
|
|
{
|
2011-05-09 19:43:01 +04:00
|
|
|
struct cache_entry **cache_end;
|
2010-11-26 21:17:46 +03:00
|
|
|
int dtype = DT_DIR;
|
2013-12-14 15:31:16 +04:00
|
|
|
int rc;
|
2019-11-22 01:04:43 +03:00
|
|
|
enum pattern_match_result ret, orig_ret;
|
|
|
|
orig_ret = path_matches_pattern_list(prefix->buf, prefix->len,
|
|
|
|
basename, &dtype, pl, istate);
|
2010-11-26 21:17:46 +03:00
|
|
|
|
2013-12-14 15:31:16 +04:00
|
|
|
strbuf_addch(prefix, '/');
|
2010-11-26 21:17:46 +03:00
|
|
|
|
2011-05-09 19:43:01 +04:00
|
|
|
/* If undecided, use matching result of parent dir in defval */
|
2019-11-22 01:04:43 +03:00
|
|
|
if (orig_ret == UNDECIDED)
|
2019-09-03 21:04:58 +03:00
|
|
|
ret = default_match;
|
2019-11-22 01:04:43 +03:00
|
|
|
else
|
|
|
|
ret = orig_ret;
|
2010-11-26 21:17:46 +03:00
|
|
|
|
2011-05-09 19:43:01 +04:00
|
|
|
for (cache_end = cache; cache_end != cache + nr; cache_end++) {
|
|
|
|
struct cache_entry *ce = *cache_end;
|
2013-12-14 15:31:16 +04:00
|
|
|
if (strncmp(ce->name, prefix->buf, prefix->len))
|
2011-05-09 19:43:01 +04:00
|
|
|
break;
|
2010-11-26 21:17:46 +03:00
|
|
|
}
|
|
|
|
|
2019-11-22 01:04:43 +03:00
|
|
|
if (pl->use_cone_patterns && orig_ret == MATCHED_RECURSIVE) {
|
|
|
|
struct cache_entry **ce = cache;
|
unpack-trees: correctly compute result count
The clear_ce_flags_dir() method processes the cache entries within
a common directory. The returned int is the number of cache entries
processed by that directory. When using the sparse-checkout feature
in cone mode, we can skip the pattern matching for entries in the
directories that are entirely included or entirely excluded.
eb42feca (unpack-trees: hash less in cone mode, 2019-11-21)
introduced this performance feature. The old mechanism relied on
the counts returned by calling clear_ce_flags_1(), but the new
mechanism calculated the number of rows by subtracting "cache_end"
from "cache" to find the size of the range. However, the equation
is wrong because it divides by sizeof(struct cache_entry *). This
is not how pointer arithmetic works!
A coverity build of Git for Windows in preparation for the 2.25.0
release found this issue with the warning, "Pointer differences,
such as cache_end - cache, are automatically scaled down by the
size (8 bytes) of the pointed-to type (struct cache_entry *).
Most likely, the division by sizeof(struct cache_entry *) is
extraneous and should be eliminated." This warning is correct.
This leaves us with the question "how did this even work?" The
problem that occurs with this incorrect pointer arithmetic is
a performance-only bug, and a very slight one at that. Since
the entry count returned by clear_ce_flags_dir() is reduced by
a factor of 8, the loop in clear_ce_flags_1() will re-process
entries from those directories.
By inserting global counters into unpack-tree.c and tracing
them with trace2_data_intmax() (in a private change, for
testing), I was able to see count how many times the loop inside
clear_ce_flags_1() processed an entry and how many times
clear_ce_flags_dir() was called. Each of these are reduced by at
least a factor of 8 with the current change. A factor larger
than 8 happens when multiple levels of directories are repeated.
Specifically, in the Linux kernel repo, the command
git sparse-checkout set LICENSES
restricts the working directory to only the files at root and
in the LICENSES directory. Here are the measured counts:
clear_ce_flags_1 loop blocks:
Before: 11,520
After: 1,621
clear_ce_flags_dir calls:
Before: 7,048
After: 606
While these are dramatic counts, the time spent in
clear_ce_flags_1() is under one millisecond in each case, so
the improvement is not measurable as an end-to-end time.
Reported-by: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-01-10 04:59:30 +03:00
|
|
|
rc = cache_end - cache;
|
2019-11-22 01:04:43 +03:00
|
|
|
|
|
|
|
while (ce < cache_end) {
|
|
|
|
(*ce)->ce_flags &= ~clear_mask;
|
|
|
|
ce++;
|
|
|
|
}
|
|
|
|
} else if (pl->use_cone_patterns && orig_ret == NOT_MATCHED) {
|
unpack-trees: correctly compute result count
The clear_ce_flags_dir() method processes the cache entries within
a common directory. The returned int is the number of cache entries
processed by that directory. When using the sparse-checkout feature
in cone mode, we can skip the pattern matching for entries in the
directories that are entirely included or entirely excluded.
eb42feca (unpack-trees: hash less in cone mode, 2019-11-21)
introduced this performance feature. The old mechanism relied on
the counts returned by calling clear_ce_flags_1(), but the new
mechanism calculated the number of rows by subtracting "cache_end"
from "cache" to find the size of the range. However, the equation
is wrong because it divides by sizeof(struct cache_entry *). This
is not how pointer arithmetic works!
A coverity build of Git for Windows in preparation for the 2.25.0
release found this issue with the warning, "Pointer differences,
such as cache_end - cache, are automatically scaled down by the
size (8 bytes) of the pointed-to type (struct cache_entry *).
Most likely, the division by sizeof(struct cache_entry *) is
extraneous and should be eliminated." This warning is correct.
This leaves us with the question "how did this even work?" The
problem that occurs with this incorrect pointer arithmetic is
a performance-only bug, and a very slight one at that. Since
the entry count returned by clear_ce_flags_dir() is reduced by
a factor of 8, the loop in clear_ce_flags_1() will re-process
entries from those directories.
By inserting global counters into unpack-tree.c and tracing
them with trace2_data_intmax() (in a private change, for
testing), I was able to see count how many times the loop inside
clear_ce_flags_1() processed an entry and how many times
clear_ce_flags_dir() was called. Each of these are reduced by at
least a factor of 8 with the current change. A factor larger
than 8 happens when multiple levels of directories are repeated.
Specifically, in the Linux kernel repo, the command
git sparse-checkout set LICENSES
restricts the working directory to only the files at root and
in the LICENSES directory. Here are the measured counts:
clear_ce_flags_1 loop blocks:
Before: 11,520
After: 1,621
clear_ce_flags_dir calls:
Before: 7,048
After: 606
While these are dramatic counts, the time spent in
clear_ce_flags_1() is under one millisecond in each case, so
the improvement is not measurable as an end-to-end time.
Reported-by: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-01-10 04:59:30 +03:00
|
|
|
rc = cache_end - cache;
|
2019-11-22 01:04:43 +03:00
|
|
|
} else {
|
|
|
|
rc = clear_ce_flags_1(istate, cache, cache_end - cache,
|
|
|
|
prefix,
|
|
|
|
select_mask, clear_mask,
|
2019-11-22 01:04:44 +03:00
|
|
|
pl, ret,
|
|
|
|
progress_nr);
|
2019-11-22 01:04:43 +03:00
|
|
|
}
|
|
|
|
|
2013-12-14 15:31:16 +04:00
|
|
|
strbuf_setlen(prefix, prefix->len - 1);
|
|
|
|
return rc;
|
2010-11-26 21:17:46 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Traverse the index, find every entry that matches according to
|
2019-09-03 21:04:56 +03:00
|
|
|
* o->pl. Do "ce_flags &= ~clear_mask" on those entries. Return the
|
2010-11-26 21:17:46 +03:00
|
|
|
* number of traversed entries.
|
|
|
|
*
|
|
|
|
* If select_mask is non-zero, only entries whose ce_flags has on of
|
|
|
|
* those bits enabled are traversed.
|
|
|
|
*
|
|
|
|
* cache : pointer to an index entry
|
|
|
|
* prefix_len : an offset to its path
|
|
|
|
*
|
|
|
|
* The current path ("prefix") including the trailing '/' is
|
|
|
|
* cache[0]->name[0..(prefix_len-1)]
|
|
|
|
* Top level path has prefix_len zero.
|
|
|
|
*/
|
2018-08-13 19:14:28 +03:00
|
|
|
static int clear_ce_flags_1(struct index_state *istate,
|
|
|
|
struct cache_entry **cache, int nr,
|
2013-12-14 15:31:16 +04:00
|
|
|
struct strbuf *prefix,
|
2010-11-26 21:17:46 +03:00
|
|
|
int select_mask, int clear_mask,
|
2019-09-03 21:04:58 +03:00
|
|
|
struct pattern_list *pl,
|
2019-11-22 01:04:44 +03:00
|
|
|
enum pattern_match_result default_match,
|
|
|
|
int progress_nr)
|
2010-11-26 21:17:46 +03:00
|
|
|
{
|
avoid computing zero offsets from NULL pointer
The Undefined Behavior Sanitizer in clang-11 seems to have learned a new
trick: it complains about computing offsets from a NULL pointer, even if
that offset is 0. This causes numerous test failures. For example, from
t1090:
unpack-trees.c:1355:41: runtime error: applying zero offset to null pointer
...
not ok 6 - in partial clone, sparse checkout only fetches needed blobs
The code in question looks like this:
struct cache_entry **cache_end = cache + nr;
...
while (cache != cache_end)
and we sometimes pass in a NULL and 0 for "cache" and "nr". This is
conceptually fine, as "cache_end" would be equal to "cache" in this
case, and we wouldn't enter the loop at all. But computing even a zero
offset violates the C standard. And given the fact that UBSan is
noticing this behavior, this might be a potential problem spot if the
compiler starts making unexpected assumptions based on undefined
behavior.
So let's just avoid it, which is pretty easy. In some cases we can just
switch to iterating with a numeric index (as we do in sequencer.c here).
In other cases (like the cache_end one) the use of an end pointer is
more natural; we can keep that by just explicitly checking for the
NULL/0 case when assigning the end pointer.
Note that there are two ways you can write this latter case, checking
for the pointer:
cache_end = cache ? cache + nr : cache;
or the size:
cache_end = nr ? cache + nr : cache;
For the case of a NULL/0 ptr/len combo, they are equivalent. But writing
it the second way (as this patch does) has the property that if somebody
were to incorrectly pass a NULL pointer with a non-zero length, we'd
continue to notice and segfault, rather than silently pretending the
length was zero.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-01-29 08:46:47 +03:00
|
|
|
struct cache_entry **cache_end = nr ? cache + nr : cache;
|
2010-11-26 21:17:46 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Process all entries that have the given prefix and meet
|
|
|
|
* select_mask condition
|
|
|
|
*/
|
|
|
|
while(cache != cache_end) {
|
|
|
|
struct cache_entry *ce = *cache;
|
|
|
|
const char *name, *slash;
|
2019-09-03 21:04:58 +03:00
|
|
|
int len, dtype;
|
|
|
|
enum pattern_match_result ret;
|
2010-11-26 21:17:46 +03:00
|
|
|
|
2019-11-22 01:04:44 +03:00
|
|
|
display_progress(istate->progress, progress_nr);
|
|
|
|
|
2010-11-26 21:17:46 +03:00
|
|
|
if (select_mask && !(ce->ce_flags & select_mask)) {
|
|
|
|
cache++;
|
2019-11-22 01:04:44 +03:00
|
|
|
progress_nr++;
|
2010-11-26 21:17:46 +03:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2013-12-14 15:31:16 +04:00
|
|
|
if (prefix->len && strncmp(ce->name, prefix->buf, prefix->len))
|
2010-11-26 21:17:46 +03:00
|
|
|
break;
|
|
|
|
|
2013-12-14 15:31:16 +04:00
|
|
|
name = ce->name + prefix->len;
|
2010-11-26 21:17:46 +03:00
|
|
|
slash = strchr(name, '/');
|
|
|
|
|
|
|
|
/* If it's a directory, try whole directory match first */
|
|
|
|
if (slash) {
|
|
|
|
int processed;
|
|
|
|
|
|
|
|
len = slash - name;
|
2013-12-14 15:31:16 +04:00
|
|
|
strbuf_add(prefix, name, len);
|
2010-11-26 21:17:46 +03:00
|
|
|
|
2018-08-13 19:14:28 +03:00
|
|
|
processed = clear_ce_flags_dir(istate, cache, cache_end - cache,
|
2013-12-14 15:31:16 +04:00
|
|
|
prefix,
|
|
|
|
prefix->buf + prefix->len - len,
|
2010-11-26 21:17:46 +03:00
|
|
|
select_mask, clear_mask,
|
2019-11-22 01:04:44 +03:00
|
|
|
pl, default_match,
|
|
|
|
progress_nr);
|
2010-11-26 21:17:46 +03:00
|
|
|
|
|
|
|
/* clear_c_f_dir eats a whole dir already? */
|
|
|
|
if (processed) {
|
|
|
|
cache += processed;
|
2019-11-22 01:04:44 +03:00
|
|
|
progress_nr += processed;
|
2013-12-14 15:31:16 +04:00
|
|
|
strbuf_setlen(prefix, prefix->len - len);
|
2010-11-26 21:17:46 +03:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2013-12-14 15:31:16 +04:00
|
|
|
strbuf_addch(prefix, '/');
|
2019-11-22 01:04:44 +03:00
|
|
|
processed = clear_ce_flags_1(istate, cache, cache_end - cache,
|
|
|
|
prefix,
|
|
|
|
select_mask, clear_mask, pl,
|
|
|
|
default_match, progress_nr);
|
|
|
|
|
|
|
|
cache += processed;
|
|
|
|
progress_nr += processed;
|
|
|
|
|
2013-12-14 15:31:16 +04:00
|
|
|
strbuf_setlen(prefix, prefix->len - len - 1);
|
2010-11-26 21:17:46 +03:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Non-directory */
|
|
|
|
dtype = ce_to_dtype(ce);
|
2019-09-03 21:04:58 +03:00
|
|
|
ret = path_matches_pattern_list(ce->name,
|
|
|
|
ce_namelen(ce),
|
|
|
|
name, &dtype, pl, istate);
|
|
|
|
if (ret == UNDECIDED)
|
|
|
|
ret = default_match;
|
2020-01-31 23:16:15 +03:00
|
|
|
if (ret == MATCHED || ret == MATCHED_RECURSIVE)
|
2010-11-26 21:17:46 +03:00
|
|
|
ce->ce_flags &= ~clear_mask;
|
|
|
|
cache++;
|
2019-11-22 01:04:44 +03:00
|
|
|
progress_nr++;
|
2010-11-26 21:17:46 +03:00
|
|
|
}
|
2019-11-22 01:04:44 +03:00
|
|
|
|
|
|
|
display_progress(istate->progress, progress_nr);
|
2010-11-26 21:17:46 +03:00
|
|
|
return nr - (cache_end - cache);
|
|
|
|
}
|
|
|
|
|
2018-08-13 19:14:28 +03:00
|
|
|
static int clear_ce_flags(struct index_state *istate,
|
|
|
|
int select_mask, int clear_mask,
|
2019-11-22 01:04:44 +03:00
|
|
|
struct pattern_list *pl,
|
|
|
|
int show_progress)
|
2010-11-26 21:17:46 +03:00
|
|
|
{
|
2013-12-14 15:31:16 +04:00
|
|
|
static struct strbuf prefix = STRBUF_INIT;
|
2019-11-22 01:04:39 +03:00
|
|
|
char label[100];
|
|
|
|
int rval;
|
2013-12-14 15:31:16 +04:00
|
|
|
|
|
|
|
strbuf_reset(&prefix);
|
2019-11-22 01:04:44 +03:00
|
|
|
if (show_progress)
|
|
|
|
istate->progress = start_delayed_progress(
|
|
|
|
_("Updating index flags"),
|
|
|
|
istate->cache_nr);
|
2013-12-14 15:31:16 +04:00
|
|
|
|
2019-11-22 01:04:39 +03:00
|
|
|
xsnprintf(label, sizeof(label), "clear_ce_flags(0x%08lx,0x%08lx)",
|
|
|
|
(unsigned long)select_mask, (unsigned long)clear_mask);
|
|
|
|
trace2_region_enter("unpack_trees", label, the_repository);
|
|
|
|
rval = clear_ce_flags_1(istate,
|
2018-08-13 19:14:28 +03:00
|
|
|
istate->cache,
|
|
|
|
istate->cache_nr,
|
2013-12-14 15:31:16 +04:00
|
|
|
&prefix,
|
2010-11-26 21:17:46 +03:00
|
|
|
select_mask, clear_mask,
|
2019-11-22 01:04:44 +03:00
|
|
|
pl, 0, 0);
|
2019-11-22 01:04:39 +03:00
|
|
|
trace2_region_leave("unpack_trees", label, the_repository);
|
|
|
|
|
2019-11-22 01:04:44 +03:00
|
|
|
stop_progress(&istate->progress);
|
2019-11-22 01:04:39 +03:00
|
|
|
return rval;
|
2010-11-26 21:17:46 +03:00
|
|
|
}
|
|
|
|
|
unpack-trees: move all skip-worktree checks back to unpack_trees()
Earlier, the will_have_skip_worktree() checks are done in various
places, which makes it hard to traverse the index tree-alike, required
by excluded_from_list(). This patch moves all the checks into two
loops in unpack_trees().
Entries in index in this operation can be classified into two
groups: ones already in index before unpack_trees() is called and ones
added to index after traverse_trees() is called.
In both groups, before checking file status on worktree, the future
skip-worktree bit must be checked, so that if an entry will be outside
worktree, worktree should not be checked.
For the first group, the future skip-worktree bit is precomputed and
stored as CE_NEW_SKIP_WORKTREE in the first loop before
traverse_trees() is called so that *way_merge() function does not need
to compute it again.
For the second group, because we don't know what entries will be in
this group until traverse_trees() finishes, operations that need
future skip-worktree check is delayed until CE_NEW_SKIP_WORKTREE is
computed in the second loop. CE_ADDED is used to mark entries in the
second group.
CE_ADDED and CE_NEW_SKIP_WORKTREE are temporary flags used in
unpack_trees(). CE_ADDED is only used by add_to_index(), which should
not be called while unpack_trees() is running.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-11-27 09:24:04 +03:00
|
|
|
/*
|
|
|
|
* Set/Clear CE_NEW_SKIP_WORKTREE according to $GIT_DIR/info/sparse-checkout
|
|
|
|
*/
|
2019-09-03 21:04:56 +03:00
|
|
|
static void mark_new_skip_worktree(struct pattern_list *pl,
|
2018-08-13 19:14:27 +03:00
|
|
|
struct index_state *istate,
|
2019-11-22 01:04:44 +03:00
|
|
|
int select_flag, int skip_wt_flag,
|
|
|
|
int show_progress)
|
unpack-trees: move all skip-worktree checks back to unpack_trees()
Earlier, the will_have_skip_worktree() checks are done in various
places, which makes it hard to traverse the index tree-alike, required
by excluded_from_list(). This patch moves all the checks into two
loops in unpack_trees().
Entries in index in this operation can be classified into two
groups: ones already in index before unpack_trees() is called and ones
added to index after traverse_trees() is called.
In both groups, before checking file status on worktree, the future
skip-worktree bit must be checked, so that if an entry will be outside
worktree, worktree should not be checked.
For the first group, the future skip-worktree bit is precomputed and
stored as CE_NEW_SKIP_WORKTREE in the first loop before
traverse_trees() is called so that *way_merge() function does not need
to compute it again.
For the second group, because we don't know what entries will be in
this group until traverse_trees() finishes, operations that need
future skip-worktree check is delayed until CE_NEW_SKIP_WORKTREE is
computed in the second loop. CE_ADDED is used to mark entries in the
second group.
CE_ADDED and CE_NEW_SKIP_WORKTREE are temporary flags used in
unpack_trees(). CE_ADDED is only used by add_to_index(), which should
not be called while unpack_trees() is running.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-11-27 09:24:04 +03:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2010-11-26 21:17:46 +03:00
|
|
|
/*
|
|
|
|
* 1. Pretend the narrowest worktree: only unmerged entries
|
|
|
|
* are checked out
|
|
|
|
*/
|
2018-08-13 19:14:27 +03:00
|
|
|
for (i = 0; i < istate->cache_nr; i++) {
|
|
|
|
struct cache_entry *ce = istate->cache[i];
|
unpack-trees: move all skip-worktree checks back to unpack_trees()
Earlier, the will_have_skip_worktree() checks are done in various
places, which makes it hard to traverse the index tree-alike, required
by excluded_from_list(). This patch moves all the checks into two
loops in unpack_trees().
Entries in index in this operation can be classified into two
groups: ones already in index before unpack_trees() is called and ones
added to index after traverse_trees() is called.
In both groups, before checking file status on worktree, the future
skip-worktree bit must be checked, so that if an entry will be outside
worktree, worktree should not be checked.
For the first group, the future skip-worktree bit is precomputed and
stored as CE_NEW_SKIP_WORKTREE in the first loop before
traverse_trees() is called so that *way_merge() function does not need
to compute it again.
For the second group, because we don't know what entries will be in
this group until traverse_trees() finishes, operations that need
future skip-worktree check is delayed until CE_NEW_SKIP_WORKTREE is
computed in the second loop. CE_ADDED is used to mark entries in the
second group.
CE_ADDED and CE_NEW_SKIP_WORKTREE are temporary flags used in
unpack_trees(). CE_ADDED is only used by add_to_index(), which should
not be called while unpack_trees() is running.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-11-27 09:24:04 +03:00
|
|
|
|
|
|
|
if (select_flag && !(ce->ce_flags & select_flag))
|
|
|
|
continue;
|
|
|
|
|
2018-07-10 22:17:48 +03:00
|
|
|
if (!ce_stage(ce) && !(ce->ce_flags & CE_CONFLICTED))
|
unpack-trees: move all skip-worktree checks back to unpack_trees()
Earlier, the will_have_skip_worktree() checks are done in various
places, which makes it hard to traverse the index tree-alike, required
by excluded_from_list(). This patch moves all the checks into two
loops in unpack_trees().
Entries in index in this operation can be classified into two
groups: ones already in index before unpack_trees() is called and ones
added to index after traverse_trees() is called.
In both groups, before checking file status on worktree, the future
skip-worktree bit must be checked, so that if an entry will be outside
worktree, worktree should not be checked.
For the first group, the future skip-worktree bit is precomputed and
stored as CE_NEW_SKIP_WORKTREE in the first loop before
traverse_trees() is called so that *way_merge() function does not need
to compute it again.
For the second group, because we don't know what entries will be in
this group until traverse_trees() finishes, operations that need
future skip-worktree check is delayed until CE_NEW_SKIP_WORKTREE is
computed in the second loop. CE_ADDED is used to mark entries in the
second group.
CE_ADDED and CE_NEW_SKIP_WORKTREE are temporary flags used in
unpack_trees(). CE_ADDED is only used by add_to_index(), which should
not be called while unpack_trees() is running.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-11-27 09:24:04 +03:00
|
|
|
ce->ce_flags |= skip_wt_flag;
|
|
|
|
else
|
|
|
|
ce->ce_flags &= ~skip_wt_flag;
|
|
|
|
}
|
2010-11-26 21:17:46 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* 2. Widen worktree according to sparse-checkout file.
|
|
|
|
* Matched entries will have skip_wt_flag cleared (i.e. "in")
|
|
|
|
*/
|
2019-11-22 01:04:44 +03:00
|
|
|
clear_ce_flags(istate, select_flag, skip_wt_flag, pl, show_progress);
|
unpack-trees: move all skip-worktree checks back to unpack_trees()
Earlier, the will_have_skip_worktree() checks are done in various
places, which makes it hard to traverse the index tree-alike, required
by excluded_from_list(). This patch moves all the checks into two
loops in unpack_trees().
Entries in index in this operation can be classified into two
groups: ones already in index before unpack_trees() is called and ones
added to index after traverse_trees() is called.
In both groups, before checking file status on worktree, the future
skip-worktree bit must be checked, so that if an entry will be outside
worktree, worktree should not be checked.
For the first group, the future skip-worktree bit is precomputed and
stored as CE_NEW_SKIP_WORKTREE in the first loop before
traverse_trees() is called so that *way_merge() function does not need
to compute it again.
For the second group, because we don't know what entries will be in
this group until traverse_trees() finishes, operations that need
future skip-worktree check is delayed until CE_NEW_SKIP_WORKTREE is
computed in the second loop. CE_ADDED is used to mark entries in the
second group.
CE_ADDED and CE_NEW_SKIP_WORKTREE are temporary flags used in
unpack_trees(). CE_ADDED is only used by add_to_index(), which should
not be called while unpack_trees() is running.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-11-27 09:24:04 +03:00
|
|
|
}
|
|
|
|
|
2020-03-27 03:48:51 +03:00
|
|
|
static void populate_from_existing_patterns(struct unpack_trees_options *o,
|
|
|
|
struct pattern_list *pl)
|
|
|
|
{
|
2021-01-23 22:58:17 +03:00
|
|
|
if (get_sparse_checkout_patterns(pl) < 0)
|
2020-03-27 03:48:51 +03:00
|
|
|
o->skip_sparse_checkout = 1;
|
|
|
|
else
|
|
|
|
o->pl = pl;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-06-02 19:46:55 +04:00
|
|
|
static int verify_absent(const struct cache_entry *,
|
|
|
|
enum unpack_trees_error_types,
|
|
|
|
struct unpack_trees_options *);
|
2008-05-29 02:12:30 +04:00
|
|
|
/*
|
|
|
|
* N-way merge "len" trees. Returns 0 on success, -1 on failure to manipulate the
|
|
|
|
* resulting index, -2 on failure to reflect the changes to the work tree.
|
unpack-trees: move all skip-worktree checks back to unpack_trees()
Earlier, the will_have_skip_worktree() checks are done in various
places, which makes it hard to traverse the index tree-alike, required
by excluded_from_list(). This patch moves all the checks into two
loops in unpack_trees().
Entries in index in this operation can be classified into two
groups: ones already in index before unpack_trees() is called and ones
added to index after traverse_trees() is called.
In both groups, before checking file status on worktree, the future
skip-worktree bit must be checked, so that if an entry will be outside
worktree, worktree should not be checked.
For the first group, the future skip-worktree bit is precomputed and
stored as CE_NEW_SKIP_WORKTREE in the first loop before
traverse_trees() is called so that *way_merge() function does not need
to compute it again.
For the second group, because we don't know what entries will be in
this group until traverse_trees() finishes, operations that need
future skip-worktree check is delayed until CE_NEW_SKIP_WORKTREE is
computed in the second loop. CE_ADDED is used to mark entries in the
second group.
CE_ADDED and CE_NEW_SKIP_WORKTREE are temporary flags used in
unpack_trees(). CE_ADDED is only used by add_to_index(), which should
not be called while unpack_trees() is running.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-11-27 09:24:04 +03:00
|
|
|
*
|
|
|
|
* CE_ADDED, CE_UNPACKED and CE_NEW_SKIP_WORKTREE are used internally
|
2008-05-29 02:12:30 +04:00
|
|
|
*/
|
2008-03-06 07:15:44 +03:00
|
|
|
int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options *o)
|
|
|
|
{
|
2021-03-30 16:10:52 +03:00
|
|
|
struct repository *repo = the_repository;
|
2009-08-20 17:47:09 +04:00
|
|
|
int i, ret;
|
2006-12-04 13:11:39 +03:00
|
|
|
static struct cache_entry *dfc;
|
2019-09-03 21:04:56 +03:00
|
|
|
struct pattern_list pl;
|
2020-03-27 03:48:47 +03:00
|
|
|
int free_pattern_list = 0;
|
2006-07-30 22:25:18 +04:00
|
|
|
|
2008-03-14 08:07:18 +03:00
|
|
|
if (len > MAX_UNPACK_TREES)
|
|
|
|
die("unpack_trees takes at most %d trees", MAX_UNPACK_TREES);
|
2006-07-30 22:25:18 +04:00
|
|
|
|
2018-08-18 17:41:23 +03:00
|
|
|
trace_performance_enter();
|
2021-01-04 06:09:11 +03:00
|
|
|
trace2_region_enter("unpack_trees", "unpack_trees", the_repository);
|
|
|
|
|
2021-03-30 16:10:52 +03:00
|
|
|
prepare_repo_settings(repo);
|
|
|
|
if (repo->settings.command_requires_full_index) {
|
|
|
|
ensure_full_index(o->src_index);
|
|
|
|
ensure_full_index(o->dst_index);
|
|
|
|
}
|
|
|
|
|
2009-08-20 17:47:08 +04:00
|
|
|
if (!core_apply_sparse_checkout || !o->update)
|
|
|
|
o->skip_sparse_checkout = 1;
|
2019-11-22 01:04:46 +03:00
|
|
|
if (!o->skip_sparse_checkout && !o->pl) {
|
2020-03-27 03:48:51 +03:00
|
|
|
memset(&pl, 0, sizeof(pl));
|
2020-03-27 03:48:47 +03:00
|
|
|
free_pattern_list = 1;
|
2020-03-27 03:48:51 +03:00
|
|
|
populate_from_existing_patterns(o, &pl);
|
2009-08-20 17:47:08 +04:00
|
|
|
}
|
|
|
|
|
2008-03-07 05:12:28 +03:00
|
|
|
memset(&o->result, 0, sizeof(o->result));
|
unpack_trees(): protect the handcrafted in-core index from read_cache()
unpack_trees() rebuilds the in-core index from scratch by allocating a new
structure and finishing it off by copying the built one to the final
index.
The resulting in-core index is Ok for most use, but read_cache() does not
recognize it as such. The function is meant to be no-op if you already
have loaded the index, until you call discard_cache().
This change the way read_cache() detects an already initialized in-core
index, by introducing an extra bit, and marks the handcrafted in-core
index as initialized, to avoid this problem.
A better fix in the longer term would be to change the read_cache() API so
that it will always discard and re-read from the on-disk index to avoid
confusion. But there are higher level API that have relied on the current
semantics, and they and their users all need to get converted, which is
outside the scope of 'maint' track.
An example of such a higher level API is write_cache_as_tree(), which is
used by git-write-tree as well as later Porcelains like git-merge, revert
and cherry-pick. In the longer term, we should remove read_cache() from
there and add one to cmd_write_tree(); other callers expect that the
in-core index they prepared is what gets written as a tree so no other
change is necessary for this particular codepath.
The original version of this patch marked the index by pointing an
otherwise wasted malloc'ed memory with o->result.alloc, but this version
uses Linus's idea to use a new "initialized" bit, which is conceptually
much cleaner.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-08-23 23:57:30 +04:00
|
|
|
o->result.initialized = 1;
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
o->result.timestamp.sec = o->src_index->timestamp.sec;
|
|
|
|
o->result.timestamp.nsec = o->src_index->timestamp.nsec;
|
2012-04-28 02:58:13 +04:00
|
|
|
o->result.version = o->src_index->version;
|
unpack_trees: fix breakage when o->src_index != o->dst_index
Currently, all callers of unpack_trees() set o->src_index == o->dst_index.
The code in unpack_trees() does not correctly handle them being different.
There are two separate issues:
First, there is the possibility of memory corruption. Since
unpack_trees() creates a temporary index in o->result and then discards
o->dst_index and overwrites it with o->result, in the special case that
o->src_index == o->dst_index, it is safe to just reuse o->src_index's
split_index for o->result. However, when src and dst are different,
reusing o->src_index's split_index for o->result will cause the
split_index to be shared. If either index then has entries replaced or
removed, it will result in the other index referring to free()'d memory.
Second, we can drop the index extensions. Previously, we were moving
index extensions from o->dst_index to o->result. Since o->src_index is
the one that will have the necessary extensions (o->dst_index is likely to
be a new index temporary index created to store the results), we should be
moving the index extensions from there.
Signed-off-by: Elijah Newren <newren@gmail.com>
Acked-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-04-24 09:50:45 +03:00
|
|
|
if (!o->src_index->split_index) {
|
|
|
|
o->result.split_index = NULL;
|
|
|
|
} else if (o->src_index == o->dst_index) {
|
|
|
|
/*
|
|
|
|
* o->dst_index (and thus o->src_index) will be discarded
|
|
|
|
* and overwritten with o->result at the end of this function,
|
|
|
|
* so just use src_index's split_index to avoid having to
|
|
|
|
* create a new one.
|
|
|
|
*/
|
|
|
|
o->result.split_index = o->src_index->split_index;
|
2014-06-13 16:19:36 +04:00
|
|
|
o->result.split_index->refcount++;
|
unpack_trees: fix breakage when o->src_index != o->dst_index
Currently, all callers of unpack_trees() set o->src_index == o->dst_index.
The code in unpack_trees() does not correctly handle them being different.
There are two separate issues:
First, there is the possibility of memory corruption. Since
unpack_trees() creates a temporary index in o->result and then discards
o->dst_index and overwrites it with o->result, in the special case that
o->src_index == o->dst_index, it is safe to just reuse o->src_index's
split_index for o->result. However, when src and dst are different,
reusing o->src_index's split_index for o->result will cause the
split_index to be shared. If either index then has entries replaced or
removed, it will result in the other index referring to free()'d memory.
Second, we can drop the index extensions. Previously, we were moving
index extensions from o->dst_index to o->result. Since o->src_index is
the one that will have the necessary extensions (o->dst_index is likely to
be a new index temporary index created to store the results), we should be
moving the index extensions from there.
Signed-off-by: Elijah Newren <newren@gmail.com>
Acked-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-04-24 09:50:45 +03:00
|
|
|
} else {
|
|
|
|
o->result.split_index = init_split_index(&o->result);
|
|
|
|
}
|
2018-05-02 03:25:44 +03:00
|
|
|
oidcpy(&o->result.oid, &o->src_index->oid);
|
2006-07-30 22:25:18 +04:00
|
|
|
o->merge_size = len;
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
mark_all_ce_unused(o->src_index);
|
2006-12-04 13:11:39 +03:00
|
|
|
|
fsmonitor: fix memory corruption in some corner cases
In 56c6910028a (fsmonitor: change last update timestamp on the
index_state to opaque token, 2020-01-07), we forgot to adjust the part
of `unpack_trees()` that copies the FSMonitor "last-update" information
that we copy from the source index to the result index since 679f2f9fdd2
(unpack-trees: skip stat on fsmonitor-valid files, 2019-11-20).
Since the "last-update" information is no longer a 64-bit number, but a
free-form string that has been allocated, we need to duplicate it rather
than just copying it.
This is important because there _are_ cases when `unpack_trees()` will
perform a oneway merge that implicitly calls `refresh_fsmonitor()`
(which will allocate that "last-update" token). This happens _after_
that token was copied into the result index. However, we _then_ call
`check_updates()` on that index, which will _also_ call
`refresh_fsmonitor()`, accessing the "last-update" string, which by now
would be released already.
In the instance that lead to this patch, this caused a segmentation
fault during a lengthy, complicated rebase involving the todo command
`reset` that (crucially) had to updated many files. Unfortunately, it
seems very hard to trigger that crash, therefore this patch is not
accompanied by a regression test.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-03-17 18:30:48 +03:00
|
|
|
o->result.fsmonitor_last_update =
|
|
|
|
xstrdup_or_null(o->src_index->fsmonitor_last_update);
|
2019-11-20 11:32:17 +03:00
|
|
|
|
unpack-trees: move all skip-worktree checks back to unpack_trees()
Earlier, the will_have_skip_worktree() checks are done in various
places, which makes it hard to traverse the index tree-alike, required
by excluded_from_list(). This patch moves all the checks into two
loops in unpack_trees().
Entries in index in this operation can be classified into two
groups: ones already in index before unpack_trees() is called and ones
added to index after traverse_trees() is called.
In both groups, before checking file status on worktree, the future
skip-worktree bit must be checked, so that if an entry will be outside
worktree, worktree should not be checked.
For the first group, the future skip-worktree bit is precomputed and
stored as CE_NEW_SKIP_WORKTREE in the first loop before
traverse_trees() is called so that *way_merge() function does not need
to compute it again.
For the second group, because we don't know what entries will be in
this group until traverse_trees() finishes, operations that need
future skip-worktree check is delayed until CE_NEW_SKIP_WORKTREE is
computed in the second loop. CE_ADDED is used to mark entries in the
second group.
CE_ADDED and CE_NEW_SKIP_WORKTREE are temporary flags used in
unpack_trees(). CE_ADDED is only used by add_to_index(), which should
not be called while unpack_trees() is running.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-11-27 09:24:04 +03:00
|
|
|
/*
|
|
|
|
* Sparse checkout loop #1: set NEW_SKIP_WORKTREE on existing entries
|
|
|
|
*/
|
|
|
|
if (!o->skip_sparse_checkout)
|
2019-11-22 01:04:44 +03:00
|
|
|
mark_new_skip_worktree(o->pl, o->src_index, 0,
|
|
|
|
CE_NEW_SKIP_WORKTREE, o->verbose_update);
|
unpack-trees: move all skip-worktree checks back to unpack_trees()
Earlier, the will_have_skip_worktree() checks are done in various
places, which makes it hard to traverse the index tree-alike, required
by excluded_from_list(). This patch moves all the checks into two
loops in unpack_trees().
Entries in index in this operation can be classified into two
groups: ones already in index before unpack_trees() is called and ones
added to index after traverse_trees() is called.
In both groups, before checking file status on worktree, the future
skip-worktree bit must be checked, so that if an entry will be outside
worktree, worktree should not be checked.
For the first group, the future skip-worktree bit is precomputed and
stored as CE_NEW_SKIP_WORKTREE in the first loop before
traverse_trees() is called so that *way_merge() function does not need
to compute it again.
For the second group, because we don't know what entries will be in
this group until traverse_trees() finishes, operations that need
future skip-worktree check is delayed until CE_NEW_SKIP_WORKTREE is
computed in the second loop. CE_ADDED is used to mark entries in the
second group.
CE_ADDED and CE_NEW_SKIP_WORKTREE are temporary flags used in
unpack_trees(). CE_ADDED is only used by add_to_index(), which should
not be called while unpack_trees() is running.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-11-27 09:24:04 +03:00
|
|
|
|
2006-12-04 13:11:39 +03:00
|
|
|
if (!dfc)
|
correct cache_entry allocation
Most cache_entry structs are allocated by using the
cache_entry_size macro, which rounds the size of the struct
up to the nearest multiple of 8 bytes (presumably to avoid
memory fragmentation).
There is one exception: the special "conflict entry" is
allocated with an empty name, and so is explicitly given
just one extra byte to hold the NUL.
However, later code doesn't realize that this particular
struct has been allocated differently, and happily tries
reading and copying it based on the ce_size macro, which
assumes the 8-byte alignment.
This can lead to reading uninitalized data, though since
that data is simply padding, there shouldn't be any problem
as a result. Still, it makes sense to hold the padding
assumption so as not to surprise later maintainers.
This fixes valgrind errors in t1005, t3030, t4002, and
t4114.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-10-23 08:30:58 +04:00
|
|
|
dfc = xcalloc(1, cache_entry_size(0));
|
2006-12-04 13:11:39 +03:00
|
|
|
o->df_conflict_entry = dfc;
|
2006-07-30 22:25:18 +04:00
|
|
|
|
|
|
|
if (len) {
|
2008-03-06 07:15:44 +03:00
|
|
|
const char *prefix = o->prefix ? o->prefix : "";
|
|
|
|
struct traverse_info info;
|
|
|
|
|
|
|
|
setup_traverse_info(&info, prefix);
|
|
|
|
info.fn = unpack_callback;
|
|
|
|
info.data = o;
|
2010-08-11 12:38:07 +04:00
|
|
|
info.show_all_errors = o->show_all_errors;
|
2011-08-29 23:31:06 +04:00
|
|
|
info.pathspec = o->pathspec;
|
2008-03-06 07:15:44 +03:00
|
|
|
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
if (o->prefix) {
|
|
|
|
/*
|
|
|
|
* Unpack existing index entries that sort before the
|
|
|
|
* prefix the tree is spliced into. Note that o->merge
|
|
|
|
* is always true in this case.
|
|
|
|
*/
|
|
|
|
while (1) {
|
|
|
|
struct cache_entry *ce = next_cache_entry(o);
|
|
|
|
if (!ce)
|
|
|
|
break;
|
|
|
|
if (ce_in_traverse_path(ce, &info))
|
|
|
|
break;
|
|
|
|
if (unpack_index_entry(ce, o) < 0)
|
|
|
|
goto return_failed;
|
|
|
|
}
|
2009-08-20 17:47:08 +04:00
|
|
|
}
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
|
2018-08-18 17:41:23 +03:00
|
|
|
trace_performance_enter();
|
2021-01-04 06:09:11 +03:00
|
|
|
trace2_region_enter("unpack_trees", "traverse_trees", the_repository);
|
2018-11-18 19:47:57 +03:00
|
|
|
ret = traverse_trees(o->src_index, len, t, &info);
|
2021-01-04 06:09:11 +03:00
|
|
|
trace2_region_leave("unpack_trees", "traverse_trees", the_repository);
|
2018-08-18 17:41:23 +03:00
|
|
|
trace_performance_leave("traverse_trees");
|
|
|
|
if (ret < 0)
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
goto return_failed;
|
2006-07-30 22:25:18 +04:00
|
|
|
}
|
|
|
|
|
2008-03-06 07:15:44 +03:00
|
|
|
/* Any left-over entries in the index? */
|
|
|
|
if (o->merge) {
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
while (1) {
|
|
|
|
struct cache_entry *ce = next_cache_entry(o);
|
|
|
|
if (!ce)
|
|
|
|
break;
|
2008-03-06 07:15:44 +03:00
|
|
|
if (unpack_index_entry(ce, o) < 0)
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
goto return_failed;
|
2008-02-07 19:39:52 +03:00
|
|
|
}
|
|
|
|
}
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
mark_all_ce_unused(o->src_index);
|
2006-07-30 22:25:18 +04:00
|
|
|
|
2009-08-20 17:47:08 +04:00
|
|
|
if (o->trivial_merges_only && o->nontrivial_merge) {
|
|
|
|
ret = unpack_failed(o, "Merge requires file-level merging");
|
|
|
|
goto done;
|
|
|
|
}
|
2008-03-06 07:15:44 +03:00
|
|
|
|
2009-08-20 17:47:09 +04:00
|
|
|
if (!o->skip_sparse_checkout) {
|
unpack-trees: move all skip-worktree checks back to unpack_trees()
Earlier, the will_have_skip_worktree() checks are done in various
places, which makes it hard to traverse the index tree-alike, required
by excluded_from_list(). This patch moves all the checks into two
loops in unpack_trees().
Entries in index in this operation can be classified into two
groups: ones already in index before unpack_trees() is called and ones
added to index after traverse_trees() is called.
In both groups, before checking file status on worktree, the future
skip-worktree bit must be checked, so that if an entry will be outside
worktree, worktree should not be checked.
For the first group, the future skip-worktree bit is precomputed and
stored as CE_NEW_SKIP_WORKTREE in the first loop before
traverse_trees() is called so that *way_merge() function does not need
to compute it again.
For the second group, because we don't know what entries will be in
this group until traverse_trees() finishes, operations that need
future skip-worktree check is delayed until CE_NEW_SKIP_WORKTREE is
computed in the second loop. CE_ADDED is used to mark entries in the
second group.
CE_ADDED and CE_NEW_SKIP_WORKTREE are temporary flags used in
unpack_trees(). CE_ADDED is only used by add_to_index(), which should
not be called while unpack_trees() is running.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-11-27 09:24:04 +03:00
|
|
|
/*
|
|
|
|
* Sparse checkout loop #2: set NEW_SKIP_WORKTREE on entries not in loop #1
|
2020-03-27 03:48:44 +03:00
|
|
|
* If they will have NEW_SKIP_WORKTREE, also set CE_SKIP_WORKTREE
|
unpack-trees: move all skip-worktree checks back to unpack_trees()
Earlier, the will_have_skip_worktree() checks are done in various
places, which makes it hard to traverse the index tree-alike, required
by excluded_from_list(). This patch moves all the checks into two
loops in unpack_trees().
Entries in index in this operation can be classified into two
groups: ones already in index before unpack_trees() is called and ones
added to index after traverse_trees() is called.
In both groups, before checking file status on worktree, the future
skip-worktree bit must be checked, so that if an entry will be outside
worktree, worktree should not be checked.
For the first group, the future skip-worktree bit is precomputed and
stored as CE_NEW_SKIP_WORKTREE in the first loop before
traverse_trees() is called so that *way_merge() function does not need
to compute it again.
For the second group, because we don't know what entries will be in
this group until traverse_trees() finishes, operations that need
future skip-worktree check is delayed until CE_NEW_SKIP_WORKTREE is
computed in the second loop. CE_ADDED is used to mark entries in the
second group.
CE_ADDED and CE_NEW_SKIP_WORKTREE are temporary flags used in
unpack_trees(). CE_ADDED is only used by add_to_index(), which should
not be called while unpack_trees() is running.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-11-27 09:24:04 +03:00
|
|
|
* so apply_sparse_checkout() won't attempt to remove it from worktree
|
|
|
|
*/
|
2019-11-22 01:04:44 +03:00
|
|
|
mark_new_skip_worktree(o->pl, &o->result,
|
|
|
|
CE_ADDED, CE_SKIP_WORKTREE | CE_NEW_SKIP_WORKTREE,
|
|
|
|
o->verbose_update);
|
unpack-trees: move all skip-worktree checks back to unpack_trees()
Earlier, the will_have_skip_worktree() checks are done in various
places, which makes it hard to traverse the index tree-alike, required
by excluded_from_list(). This patch moves all the checks into two
loops in unpack_trees().
Entries in index in this operation can be classified into two
groups: ones already in index before unpack_trees() is called and ones
added to index after traverse_trees() is called.
In both groups, before checking file status on worktree, the future
skip-worktree bit must be checked, so that if an entry will be outside
worktree, worktree should not be checked.
For the first group, the future skip-worktree bit is precomputed and
stored as CE_NEW_SKIP_WORKTREE in the first loop before
traverse_trees() is called so that *way_merge() function does not need
to compute it again.
For the second group, because we don't know what entries will be in
this group until traverse_trees() finishes, operations that need
future skip-worktree check is delayed until CE_NEW_SKIP_WORKTREE is
computed in the second loop. CE_ADDED is used to mark entries in the
second group.
CE_ADDED and CE_NEW_SKIP_WORKTREE are temporary flags used in
unpack_trees(). CE_ADDED is only used by add_to_index(), which should
not be called while unpack_trees() is running.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-11-27 09:24:04 +03:00
|
|
|
|
2011-09-22 15:24:22 +04:00
|
|
|
ret = 0;
|
unpack-trees: move all skip-worktree checks back to unpack_trees()
Earlier, the will_have_skip_worktree() checks are done in various
places, which makes it hard to traverse the index tree-alike, required
by excluded_from_list(). This patch moves all the checks into two
loops in unpack_trees().
Entries in index in this operation can be classified into two
groups: ones already in index before unpack_trees() is called and ones
added to index after traverse_trees() is called.
In both groups, before checking file status on worktree, the future
skip-worktree bit must be checked, so that if an entry will be outside
worktree, worktree should not be checked.
For the first group, the future skip-worktree bit is precomputed and
stored as CE_NEW_SKIP_WORKTREE in the first loop before
traverse_trees() is called so that *way_merge() function does not need
to compute it again.
For the second group, because we don't know what entries will be in
this group until traverse_trees() finishes, operations that need
future skip-worktree check is delayed until CE_NEW_SKIP_WORKTREE is
computed in the second loop. CE_ADDED is used to mark entries in the
second group.
CE_ADDED and CE_NEW_SKIP_WORKTREE are temporary flags used in
unpack_trees(). CE_ADDED is only used by add_to_index(), which should
not be called while unpack_trees() is running.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-11-27 09:24:04 +03:00
|
|
|
for (i = 0; i < o->result.cache_nr; i++) {
|
2009-08-20 17:47:09 +04:00
|
|
|
struct cache_entry *ce = o->result.cache[i];
|
|
|
|
|
unpack-trees: move all skip-worktree checks back to unpack_trees()
Earlier, the will_have_skip_worktree() checks are done in various
places, which makes it hard to traverse the index tree-alike, required
by excluded_from_list(). This patch moves all the checks into two
loops in unpack_trees().
Entries in index in this operation can be classified into two
groups: ones already in index before unpack_trees() is called and ones
added to index after traverse_trees() is called.
In both groups, before checking file status on worktree, the future
skip-worktree bit must be checked, so that if an entry will be outside
worktree, worktree should not be checked.
For the first group, the future skip-worktree bit is precomputed and
stored as CE_NEW_SKIP_WORKTREE in the first loop before
traverse_trees() is called so that *way_merge() function does not need
to compute it again.
For the second group, because we don't know what entries will be in
this group until traverse_trees() finishes, operations that need
future skip-worktree check is delayed until CE_NEW_SKIP_WORKTREE is
computed in the second loop. CE_ADDED is used to mark entries in the
second group.
CE_ADDED and CE_NEW_SKIP_WORKTREE are temporary flags used in
unpack_trees(). CE_ADDED is only used by add_to_index(), which should
not be called while unpack_trees() is running.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-11-27 09:24:04 +03:00
|
|
|
/*
|
|
|
|
* Entries marked with CE_ADDED in merged_entry() do not have
|
|
|
|
* verify_absent() check (the check is effectively disabled
|
|
|
|
* because CE_NEW_SKIP_WORKTREE is set unconditionally).
|
|
|
|
*
|
|
|
|
* Do the real check now because we have had
|
|
|
|
* correct CE_NEW_SKIP_WORKTREE
|
|
|
|
*/
|
|
|
|
if (ce->ce_flags & CE_ADDED &&
|
unpack-trees: failure to set SKIP_WORKTREE bits always just a warning
Setting and clearing of the SKIP_WORKTREE bit is not only done when
users run 'sparse-checkout'; other commands such as 'checkout' also run
through unpack_trees() which has logic for handling this special bit.
As such, we need to consider how they handle special cases. A couple
comparison points should help explain the rationale for changing how
unpack_trees() handles these bits:
Ignoring sparse checkouts for a moment, if you are switching
branches and have dirty changes, it is only considered an error that
will prevent the branch switching from being successful if the dirty
file happens to be one of the paths with different contents.
SKIP_WORKTREE has always been considered advisory; for example, if
rebase or merge need or even want to materialize a path as part of
their work, they have always been allowed to do so regardless of the
SKIP_WORKTREE setting. This has been used for unmerged paths, but
it was often used for paths it wasn't needed just because it made
the code simpler. It was a best-effort consideration, and when it
materialized paths contrary to the SKIP_WORKTREE setting, it was
never required to even print a warning message.
In the past if you trying to run e.g. 'git checkout' and:
1) you had a path that was materialized and had some dirty changes
2) the path was listed in $GITDIR/info/sparse-checkout
3) this path did not different between the current and target branches
then despite the comparison points above, the inability to set
SKIP_WORKTREE was treated as a *hard* error that would abort the
checkout operation. This is completely inconsistent with how
SKIP_WORKTREE is handled elsewhere, and rather annoying for users as
leaving the paths materialized in the working copy (with a simple
warning) should present no problem at all.
Downgrade any errors from inability to toggle the SKIP_WORKTREE bit to a
warning and allow the operations to continue.
Reviewed-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-03-27 03:49:00 +03:00
|
|
|
verify_absent(ce, WARNING_SPARSE_ORPHANED_NOT_OVERWRITTEN, o))
|
|
|
|
ret = 1;
|
|
|
|
|
|
|
|
if (apply_sparse_checkout(&o->result, ce, o))
|
|
|
|
ret = 1;
|
2009-08-20 17:47:13 +04:00
|
|
|
}
|
unpack-trees: failure to set SKIP_WORKTREE bits always just a warning
Setting and clearing of the SKIP_WORKTREE bit is not only done when
users run 'sparse-checkout'; other commands such as 'checkout' also run
through unpack_trees() which has logic for handling this special bit.
As such, we need to consider how they handle special cases. A couple
comparison points should help explain the rationale for changing how
unpack_trees() handles these bits:
Ignoring sparse checkouts for a moment, if you are switching
branches and have dirty changes, it is only considered an error that
will prevent the branch switching from being successful if the dirty
file happens to be one of the paths with different contents.
SKIP_WORKTREE has always been considered advisory; for example, if
rebase or merge need or even want to materialize a path as part of
their work, they have always been allowed to do so regardless of the
SKIP_WORKTREE setting. This has been used for unmerged paths, but
it was often used for paths it wasn't needed just because it made
the code simpler. It was a best-effort consideration, and when it
materialized paths contrary to the SKIP_WORKTREE setting, it was
never required to even print a warning message.
In the past if you trying to run e.g. 'git checkout' and:
1) you had a path that was materialized and had some dirty changes
2) the path was listed in $GITDIR/info/sparse-checkout
3) this path did not different between the current and target branches
then despite the comparison points above, the inability to set
SKIP_WORKTREE was treated as a *hard* error that would abort the
checkout operation. This is completely inconsistent with how
SKIP_WORKTREE is handled elsewhere, and rather annoying for users as
leaving the paths materialized in the working copy (with a simple
warning) should present no problem at all.
Downgrade any errors from inability to toggle the SKIP_WORKTREE bit to a
warning and allow the operations to continue.
Reviewed-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-03-27 03:49:00 +03:00
|
|
|
if (ret == 1) {
|
|
|
|
/*
|
|
|
|
* Inability to sparsify or de-sparsify individual
|
|
|
|
* paths is not an error, but just a warning.
|
|
|
|
*/
|
|
|
|
if (o->show_all_errors)
|
|
|
|
display_warning_msgs(o);
|
|
|
|
ret = 0;
|
|
|
|
}
|
2009-08-20 17:47:09 +04:00
|
|
|
}
|
2008-03-06 07:15:44 +03:00
|
|
|
|
2020-03-27 03:48:49 +03:00
|
|
|
ret = check_updates(o, &o->result) ? (-2) : 0;
|
2013-08-13 22:27:58 +04:00
|
|
|
if (o->dst_index) {
|
2018-08-18 17:41:26 +03:00
|
|
|
move_index_extensions(&o->result, o->src_index);
|
2015-07-28 22:30:40 +03:00
|
|
|
if (!ret) {
|
2018-08-18 17:41:28 +03:00
|
|
|
if (git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0))
|
2018-11-10 08:49:02 +03:00
|
|
|
cache_tree_verify(the_repository, &o->result);
|
2015-07-28 22:30:40 +03:00
|
|
|
if (!cache_tree_fully_valid(o->result.cache_tree))
|
|
|
|
cache_tree_update(&o->result,
|
|
|
|
WRITE_TREE_SILENT |
|
|
|
|
WRITE_TREE_REPAIR);
|
|
|
|
}
|
2019-02-15 20:59:21 +03:00
|
|
|
|
|
|
|
o->result.updated_workdir = 1;
|
2013-08-13 22:27:58 +04:00
|
|
|
discard_index(o->dst_index);
|
2008-03-07 05:12:28 +03:00
|
|
|
*o->dst_index = o->result;
|
2014-11-17 23:12:41 +03:00
|
|
|
} else {
|
|
|
|
discard_index(&o->result);
|
2013-08-13 22:27:58 +04:00
|
|
|
}
|
unpack_trees: fix breakage when o->src_index != o->dst_index
Currently, all callers of unpack_trees() set o->src_index == o->dst_index.
The code in unpack_trees() does not correctly handle them being different.
There are two separate issues:
First, there is the possibility of memory corruption. Since
unpack_trees() creates a temporary index in o->result and then discards
o->dst_index and overwrites it with o->result, in the special case that
o->src_index == o->dst_index, it is safe to just reuse o->src_index's
split_index for o->result. However, when src and dst are different,
reusing o->src_index's split_index for o->result will cause the
split_index to be shared. If either index then has entries replaced or
removed, it will result in the other index referring to free()'d memory.
Second, we can drop the index extensions. Previously, we were moving
index extensions from o->dst_index to o->result. Since o->src_index is
the one that will have the necessary extensions (o->dst_index is likely to
be a new index temporary index created to store the results), we should be
moving the index extensions from there.
Signed-off-by: Elijah Newren <newren@gmail.com>
Acked-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-04-24 09:50:45 +03:00
|
|
|
o->src_index = NULL;
|
2009-08-20 17:47:08 +04:00
|
|
|
|
|
|
|
done:
|
2020-03-27 03:48:47 +03:00
|
|
|
if (free_pattern_list)
|
2019-11-22 01:04:46 +03:00
|
|
|
clear_pattern_list(&pl);
|
2021-01-04 06:09:11 +03:00
|
|
|
trace2_region_leave("unpack_trees", "unpack_trees", the_repository);
|
2020-03-27 03:48:47 +03:00
|
|
|
trace_performance_leave("unpack_trees");
|
2008-05-29 02:12:30 +04:00
|
|
|
return ret;
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
|
|
|
|
return_failed:
|
2010-08-11 12:38:07 +04:00
|
|
|
if (o->show_all_errors)
|
|
|
|
display_error_msgs(o);
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
mark_all_ce_unused(o->src_index);
|
2010-01-25 04:35:58 +03:00
|
|
|
ret = unpack_failed(o, NULL);
|
2011-05-31 21:06:44 +04:00
|
|
|
if (o->exiting_early)
|
|
|
|
ret = 0;
|
2010-01-25 04:35:58 +03:00
|
|
|
goto done;
|
2006-07-30 22:25:18 +04:00
|
|
|
}
|
2006-07-30 22:26:15 +04:00
|
|
|
|
unpack-trees: add a new update_sparsity() function
Previously, the only way to update the SKIP_WORKTREE bits for various
paths was invoking `git read-tree -mu HEAD` or calling the same code
that this codepath invoked. This however had a number of problems if
the index or working directory were not clean. First, let's consider
the case:
Flipping SKIP_WORKTREE -> !SKIP_WORKTREE (materializing files)
If the working tree was clean this was fine, but if there were files or
directories or symlinks or whatever already present at the given path
then the operation would abort with an error. Let's label this case
for later discussion:
A) There is an untracked path in the way
Now let's consider the opposite case:
Flipping !SKIP_WORKTREE -> SKIP_WORKTREE (removing files)
If the index and working tree was clean this was fine, but if there were
any unclean paths we would run into problems. There are three different
cases to consider:
B) The path is unmerged
C) The path has unstaged changes
D) The path has staged changes (differs from HEAD)
If any path fell into case B or C, then the whole operation would be
aborted with an error. With sparse-checkout, the whole operation would
be aborted for case D as well, but for its predecessor of using `git
read-tree -mu HEAD` directly, any paths that fell into case D would be
removed from the working copy and the index entry for that path would be
reset to match HEAD -- which looks and feels like data loss to users
(only a few are even aware to ask whether it can be recovered, and even
then it requires walking through loose objects trying to match up the
right ones).
Refusing to remove files that have unsaved user changes is good, but
refusing to work on any other paths is very problematic for users. If
the user is in the middle of a rebase or has made modifications to files
that bring in more dependencies, then for their build to work they need
to update the sparse paths. This logic has been preventing them from
doing so. Sometimes in response, the user will stage the files and
re-try, to no avail with sparse-checkout or to the horror of losing
their changes if they are using its predecessor of `git read-tree -mu
HEAD`.
Add a new update_sparsity() function which will not error out in any of
these cases but behaves as follows for the special cases:
A) Leave the file in the working copy alone, clear the SKIP_WORKTREE
bit, and print a warning (thus leaving the path in a state where
status will report the file as modified, which seems logical).
B) Do NOT mark this path as SKIP_WORKTREE, and leave it as unmerged.
C) Do NOT mark this path as SKIP_WORKTREE and print a warning about
the dirty path.
D) Mark the path as SKIP_WORKTREE, but do not revert the version
stored in the index to match HEAD; leave the contents alone.
I tried a different behavior for A (leave the SKIP_WORKTREE bit set),
but found it very surprising and counter-intuitive (e.g. the user sees
it is present along with all the other files in that directory, tries to
stage it, but git add ignores it since the SKIP_WORKTREE bit is set). A
& C seem like optimal behavior to me. B may be as well, though I wonder
if printing a warning would be an improvement. Some might be slightly
surprised by D at first, but given that it does the right thing with
`git commit` and even `git commit -a` (`git add` ignores entries that
are marked SKIP_WORKTREE and thus doesn't delete them, and `commit -a`
is similar), it seems logical to me.
Reviewed-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-03-27 03:48:52 +03:00
|
|
|
/*
|
|
|
|
* Update SKIP_WORKTREE bits according to sparsity patterns, and update
|
|
|
|
* working directory to match.
|
|
|
|
*
|
|
|
|
* CE_NEW_SKIP_WORKTREE is used internally.
|
|
|
|
*/
|
|
|
|
enum update_sparsity_result update_sparsity(struct unpack_trees_options *o)
|
|
|
|
{
|
|
|
|
enum update_sparsity_result ret = UPDATE_SPARSITY_SUCCESS;
|
|
|
|
struct pattern_list pl;
|
2020-05-04 21:27:43 +03:00
|
|
|
int i;
|
unpack-trees: add a new update_sparsity() function
Previously, the only way to update the SKIP_WORKTREE bits for various
paths was invoking `git read-tree -mu HEAD` or calling the same code
that this codepath invoked. This however had a number of problems if
the index or working directory were not clean. First, let's consider
the case:
Flipping SKIP_WORKTREE -> !SKIP_WORKTREE (materializing files)
If the working tree was clean this was fine, but if there were files or
directories or symlinks or whatever already present at the given path
then the operation would abort with an error. Let's label this case
for later discussion:
A) There is an untracked path in the way
Now let's consider the opposite case:
Flipping !SKIP_WORKTREE -> SKIP_WORKTREE (removing files)
If the index and working tree was clean this was fine, but if there were
any unclean paths we would run into problems. There are three different
cases to consider:
B) The path is unmerged
C) The path has unstaged changes
D) The path has staged changes (differs from HEAD)
If any path fell into case B or C, then the whole operation would be
aborted with an error. With sparse-checkout, the whole operation would
be aborted for case D as well, but for its predecessor of using `git
read-tree -mu HEAD` directly, any paths that fell into case D would be
removed from the working copy and the index entry for that path would be
reset to match HEAD -- which looks and feels like data loss to users
(only a few are even aware to ask whether it can be recovered, and even
then it requires walking through loose objects trying to match up the
right ones).
Refusing to remove files that have unsaved user changes is good, but
refusing to work on any other paths is very problematic for users. If
the user is in the middle of a rebase or has made modifications to files
that bring in more dependencies, then for their build to work they need
to update the sparse paths. This logic has been preventing them from
doing so. Sometimes in response, the user will stage the files and
re-try, to no avail with sparse-checkout or to the horror of losing
their changes if they are using its predecessor of `git read-tree -mu
HEAD`.
Add a new update_sparsity() function which will not error out in any of
these cases but behaves as follows for the special cases:
A) Leave the file in the working copy alone, clear the SKIP_WORKTREE
bit, and print a warning (thus leaving the path in a state where
status will report the file as modified, which seems logical).
B) Do NOT mark this path as SKIP_WORKTREE, and leave it as unmerged.
C) Do NOT mark this path as SKIP_WORKTREE and print a warning about
the dirty path.
D) Mark the path as SKIP_WORKTREE, but do not revert the version
stored in the index to match HEAD; leave the contents alone.
I tried a different behavior for A (leave the SKIP_WORKTREE bit set),
but found it very surprising and counter-intuitive (e.g. the user sees
it is present along with all the other files in that directory, tries to
stage it, but git add ignores it since the SKIP_WORKTREE bit is set). A
& C seem like optimal behavior to me. B may be as well, though I wonder
if printing a warning would be an improvement. Some might be slightly
surprised by D at first, but given that it does the right thing with
`git commit` and even `git commit -a` (`git add` ignores entries that
are marked SKIP_WORKTREE and thus doesn't delete them, and `commit -a`
is similar), it seems logical to me.
Reviewed-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-03-27 03:48:52 +03:00
|
|
|
unsigned old_show_all_errors;
|
|
|
|
int free_pattern_list = 0;
|
|
|
|
|
|
|
|
old_show_all_errors = o->show_all_errors;
|
|
|
|
o->show_all_errors = 1;
|
|
|
|
|
|
|
|
/* Sanity checks */
|
|
|
|
if (!o->update || o->index_only || o->skip_sparse_checkout)
|
|
|
|
BUG("update_sparsity() is for reflecting sparsity patterns in working directory");
|
|
|
|
if (o->src_index != o->dst_index || o->fn)
|
|
|
|
BUG("update_sparsity() called wrong");
|
|
|
|
|
|
|
|
trace_performance_enter();
|
|
|
|
|
|
|
|
/* If we weren't given patterns, use the recorded ones */
|
|
|
|
if (!o->pl) {
|
|
|
|
memset(&pl, 0, sizeof(pl));
|
|
|
|
free_pattern_list = 1;
|
|
|
|
populate_from_existing_patterns(o, &pl);
|
|
|
|
if (o->skip_sparse_checkout)
|
|
|
|
goto skip_sparse_checkout;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Set NEW_SKIP_WORKTREE on existing entries. */
|
|
|
|
mark_all_ce_unused(o->src_index);
|
|
|
|
mark_new_skip_worktree(o->pl, o->src_index, 0,
|
|
|
|
CE_NEW_SKIP_WORKTREE, o->verbose_update);
|
|
|
|
|
|
|
|
/* Then loop over entries and update/remove as needed */
|
|
|
|
ret = UPDATE_SPARSITY_SUCCESS;
|
|
|
|
for (i = 0; i < o->src_index->cache_nr; i++) {
|
|
|
|
struct cache_entry *ce = o->src_index->cache[i];
|
|
|
|
|
2020-03-27 03:48:59 +03:00
|
|
|
|
|
|
|
if (ce_stage(ce)) {
|
|
|
|
/* -1 because for loop will increment by 1 */
|
|
|
|
i += warn_conflicted_path(o->src_index, i, o) - 1;
|
|
|
|
ret = UPDATE_SPARSITY_WARNINGS;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
unpack-trees: add a new update_sparsity() function
Previously, the only way to update the SKIP_WORKTREE bits for various
paths was invoking `git read-tree -mu HEAD` or calling the same code
that this codepath invoked. This however had a number of problems if
the index or working directory were not clean. First, let's consider
the case:
Flipping SKIP_WORKTREE -> !SKIP_WORKTREE (materializing files)
If the working tree was clean this was fine, but if there were files or
directories or symlinks or whatever already present at the given path
then the operation would abort with an error. Let's label this case
for later discussion:
A) There is an untracked path in the way
Now let's consider the opposite case:
Flipping !SKIP_WORKTREE -> SKIP_WORKTREE (removing files)
If the index and working tree was clean this was fine, but if there were
any unclean paths we would run into problems. There are three different
cases to consider:
B) The path is unmerged
C) The path has unstaged changes
D) The path has staged changes (differs from HEAD)
If any path fell into case B or C, then the whole operation would be
aborted with an error. With sparse-checkout, the whole operation would
be aborted for case D as well, but for its predecessor of using `git
read-tree -mu HEAD` directly, any paths that fell into case D would be
removed from the working copy and the index entry for that path would be
reset to match HEAD -- which looks and feels like data loss to users
(only a few are even aware to ask whether it can be recovered, and even
then it requires walking through loose objects trying to match up the
right ones).
Refusing to remove files that have unsaved user changes is good, but
refusing to work on any other paths is very problematic for users. If
the user is in the middle of a rebase or has made modifications to files
that bring in more dependencies, then for their build to work they need
to update the sparse paths. This logic has been preventing them from
doing so. Sometimes in response, the user will stage the files and
re-try, to no avail with sparse-checkout or to the horror of losing
their changes if they are using its predecessor of `git read-tree -mu
HEAD`.
Add a new update_sparsity() function which will not error out in any of
these cases but behaves as follows for the special cases:
A) Leave the file in the working copy alone, clear the SKIP_WORKTREE
bit, and print a warning (thus leaving the path in a state where
status will report the file as modified, which seems logical).
B) Do NOT mark this path as SKIP_WORKTREE, and leave it as unmerged.
C) Do NOT mark this path as SKIP_WORKTREE and print a warning about
the dirty path.
D) Mark the path as SKIP_WORKTREE, but do not revert the version
stored in the index to match HEAD; leave the contents alone.
I tried a different behavior for A (leave the SKIP_WORKTREE bit set),
but found it very surprising and counter-intuitive (e.g. the user sees
it is present along with all the other files in that directory, tries to
stage it, but git add ignores it since the SKIP_WORKTREE bit is set). A
& C seem like optimal behavior to me. B may be as well, though I wonder
if printing a warning would be an improvement. Some might be slightly
surprised by D at first, but given that it does the right thing with
`git commit` and even `git commit -a` (`git add` ignores entries that
are marked SKIP_WORKTREE and thus doesn't delete them, and `commit -a`
is similar), it seems logical to me.
Reviewed-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-03-27 03:48:52 +03:00
|
|
|
if (apply_sparse_checkout(o->src_index, ce, o))
|
|
|
|
ret = UPDATE_SPARSITY_WARNINGS;
|
|
|
|
}
|
|
|
|
|
|
|
|
skip_sparse_checkout:
|
|
|
|
if (check_updates(o, o->src_index))
|
|
|
|
ret = UPDATE_SPARSITY_WORKTREE_UPDATE_FAILURES;
|
|
|
|
|
2020-03-27 03:48:57 +03:00
|
|
|
display_warning_msgs(o);
|
unpack-trees: add a new update_sparsity() function
Previously, the only way to update the SKIP_WORKTREE bits for various
paths was invoking `git read-tree -mu HEAD` or calling the same code
that this codepath invoked. This however had a number of problems if
the index or working directory were not clean. First, let's consider
the case:
Flipping SKIP_WORKTREE -> !SKIP_WORKTREE (materializing files)
If the working tree was clean this was fine, but if there were files or
directories or symlinks or whatever already present at the given path
then the operation would abort with an error. Let's label this case
for later discussion:
A) There is an untracked path in the way
Now let's consider the opposite case:
Flipping !SKIP_WORKTREE -> SKIP_WORKTREE (removing files)
If the index and working tree was clean this was fine, but if there were
any unclean paths we would run into problems. There are three different
cases to consider:
B) The path is unmerged
C) The path has unstaged changes
D) The path has staged changes (differs from HEAD)
If any path fell into case B or C, then the whole operation would be
aborted with an error. With sparse-checkout, the whole operation would
be aborted for case D as well, but for its predecessor of using `git
read-tree -mu HEAD` directly, any paths that fell into case D would be
removed from the working copy and the index entry for that path would be
reset to match HEAD -- which looks and feels like data loss to users
(only a few are even aware to ask whether it can be recovered, and even
then it requires walking through loose objects trying to match up the
right ones).
Refusing to remove files that have unsaved user changes is good, but
refusing to work on any other paths is very problematic for users. If
the user is in the middle of a rebase or has made modifications to files
that bring in more dependencies, then for their build to work they need
to update the sparse paths. This logic has been preventing them from
doing so. Sometimes in response, the user will stage the files and
re-try, to no avail with sparse-checkout or to the horror of losing
their changes if they are using its predecessor of `git read-tree -mu
HEAD`.
Add a new update_sparsity() function which will not error out in any of
these cases but behaves as follows for the special cases:
A) Leave the file in the working copy alone, clear the SKIP_WORKTREE
bit, and print a warning (thus leaving the path in a state where
status will report the file as modified, which seems logical).
B) Do NOT mark this path as SKIP_WORKTREE, and leave it as unmerged.
C) Do NOT mark this path as SKIP_WORKTREE and print a warning about
the dirty path.
D) Mark the path as SKIP_WORKTREE, but do not revert the version
stored in the index to match HEAD; leave the contents alone.
I tried a different behavior for A (leave the SKIP_WORKTREE bit set),
but found it very surprising and counter-intuitive (e.g. the user sees
it is present along with all the other files in that directory, tries to
stage it, but git add ignores it since the SKIP_WORKTREE bit is set). A
& C seem like optimal behavior to me. B may be as well, though I wonder
if printing a warning would be an improvement. Some might be slightly
surprised by D at first, but given that it does the right thing with
`git commit` and even `git commit -a` (`git add` ignores entries that
are marked SKIP_WORKTREE and thus doesn't delete them, and `commit -a`
is similar), it seems logical to me.
Reviewed-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-03-27 03:48:52 +03:00
|
|
|
o->show_all_errors = old_show_all_errors;
|
|
|
|
if (free_pattern_list)
|
|
|
|
clear_pattern_list(&pl);
|
|
|
|
trace_performance_leave("update_sparsity");
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2006-07-30 22:26:15 +04:00
|
|
|
/* Here come the merge functions */
|
|
|
|
|
2013-06-02 19:46:55 +04:00
|
|
|
static int reject_merge(const struct cache_entry *ce,
|
|
|
|
struct unpack_trees_options *o)
|
2006-07-30 22:26:15 +04:00
|
|
|
{
|
2019-03-22 12:31:35 +03:00
|
|
|
return add_rejected_path(o, ERROR_WOULD_OVERWRITE, ce->name);
|
2006-07-30 22:26:15 +04:00
|
|
|
}
|
|
|
|
|
2013-06-02 19:46:55 +04:00
|
|
|
static int same(const struct cache_entry *a, const struct cache_entry *b)
|
2006-07-30 22:26:15 +04:00
|
|
|
{
|
|
|
|
if (!!a != !!b)
|
|
|
|
return 0;
|
|
|
|
if (!a && !b)
|
|
|
|
return 1;
|
"reset --merge": fix unmerged case
Commit 9e8ecea (Add 'merge' mode to 'git reset', 2008-12-01) disallowed
"git reset --merge" when there was unmerged entries. But it wished if
unmerged entries were reset as if --hard (instead of --merge) has been
used. This makes sense because all "mergy" operations makes sure that
any path involved in the merge does not have local modifications before
starting, so resetting such a path away won't lose any information.
The previous commit changed the behavior of --merge to accept resetting
unmerged entries if they are reset to a different state than HEAD, but it
did not reset the changes in the work tree, leaving the conflict markers
in the resulting file in the work tree.
Fix it by doing three things:
- Update the documentation to match the wish of original "reset --merge"
better, namely, "An unmerged entry is a sign that the path didn't have
any local modification and can be safely resetted to whatever the new
HEAD records";
- Update read_index_unmerged(), which reads the index file into the cache
while dropping any higher-stage entries down to stage #0, not to copy
the object name from the higher stage entry. The code used to take the
object name from the a stage entry ("base" if you happened to have
stage #1, or "ours" if both sides added, etc.), which essentially meant
that you are getting random results depending on what the merge did.
The _only_ reason we want to keep a previously unmerged entry in the
index at stage #0 is so that we don't forget the fact that we have
corresponding file in the work tree in order to be able to remove it
when the tree we are resetting to does not have the path. In order to
differentiate such an entry from ordinary cache entry, the cache entry
added by read_index_unmerged() is marked as CE_CONFLICTED.
- Update merged_entry() and deleted_entry() so that they pay attention to
cache entries marked as CE_CONFLICTED. They are previously unmerged
entries, and the files in the work tree that correspond to them are
resetted away by oneway_merge() to the version from the tree we are
resetting to.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-01 10:04:04 +03:00
|
|
|
if ((a->ce_flags | b->ce_flags) & CE_CONFLICTED)
|
|
|
|
return 0;
|
2006-07-30 22:26:15 +04:00
|
|
|
return a->ce_mode == b->ce_mode &&
|
convert "oidcmp() == 0" to oideq()
Using the more restrictive oideq() should, in the long run,
give the compiler more opportunities to optimize these
callsites. For now, this conversion should be a complete
noop with respect to the generated code.
The result is also perhaps a little more readable, as it
avoids the "zero is equal" idiom. Since it's so prevalent in
C, I think seasoned programmers tend not to even notice it
anymore, but it can sometimes make for awkward double
negations (e.g., we can drop a few !!oidcmp() instances
here).
This patch was generated almost entirely by the included
coccinelle patch. This mechanical conversion should be
completely safe, because we check explicitly for cases where
oidcmp() is compared to 0, which is what oideq() is doing
under the hood. Note that we don't have to catch "!oidcmp()"
separately; coccinelle's standard isomorphisms make sure the
two are treated equivalently.
I say "almost" because I did hand-edit the coccinelle output
to fix up a few style violations (it mostly keeps the
original formatting, but sometimes unwraps long lines).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-08-29 00:22:40 +03:00
|
|
|
oideq(&a->oid, &b->oid);
|
2006-07-30 22:26:15 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* When a CE gets turned into an unmerged entry, we
|
|
|
|
* want it to be up-to-date
|
|
|
|
*/
|
2013-06-02 19:46:55 +04:00
|
|
|
static int verify_uptodate_1(const struct cache_entry *ce,
|
|
|
|
struct unpack_trees_options *o,
|
|
|
|
enum unpack_trees_error_types error_type)
|
2006-07-30 22:26:15 +04:00
|
|
|
{
|
|
|
|
struct stat st;
|
|
|
|
|
2011-07-30 07:55:05 +04:00
|
|
|
if (o->index_only)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* CE_VALID and CE_SKIP_WORKTREE cheat, we better check again
|
|
|
|
* if this entry is truly up-to-date because this file may be
|
|
|
|
* overwritten.
|
|
|
|
*/
|
|
|
|
if ((ce->ce_flags & CE_VALID) || ce_skip_worktree(ce))
|
|
|
|
; /* keep checking */
|
|
|
|
else if (o->reset || ce_uptodate(ce))
|
2008-02-07 19:39:48 +03:00
|
|
|
return 0;
|
2006-07-30 22:26:15 +04:00
|
|
|
|
|
|
|
if (!lstat(ce->name, &st)) {
|
2011-07-30 07:55:05 +04:00
|
|
|
int flags = CE_MATCH_IGNORE_VALID|CE_MATCH_IGNORE_SKIP_WORKTREE;
|
|
|
|
unsigned changed = ie_match_stat(o->src_index, ce, &st, flags);
|
2017-03-15 00:46:39 +03:00
|
|
|
|
|
|
|
if (submodule_from_ce(ce)) {
|
|
|
|
int r = check_submodule_move_head(ce,
|
|
|
|
"HEAD", oid_to_hex(&ce->oid), o);
|
|
|
|
if (r)
|
2019-03-22 12:31:35 +03:00
|
|
|
return add_rejected_path(o, error_type, ce->name);
|
2017-03-15 00:46:39 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2006-07-30 22:26:15 +04:00
|
|
|
if (!changed)
|
2008-02-07 19:39:48 +03:00
|
|
|
return 0;
|
unpack-trees.c: assume submodules are clean during check-out
Sven originally raised this issue:
If you have a submodule checked out and you go back (or
forward) to a revision of the supermodule that contains a
different revision of the submodule and then switch to
another revision, it will complain that the submodule is not
uptodate, because git simply didn't update the submodule in
the first move.
The current policy is to consider it is perfectly normal that
checked-out submodule is out-of-sync wrt the supermodule index.
At least until we introduce a superproject repository
configuration option that says "in this repository, I do care
about this submodule and at any time I move around in the
superproject, recursively check out the submodule to match", it
is a reasonable policy, as we currently do not recursively
checkout the submodules at all. The most extreme case of this
policy is that the superproject index knows about the submodule
but the subdirectory does not even have to be checked out.
The function verify_uptodate(), called during the two-way merge
aka branch switching, is about "make sure the filesystem entity
that corresponds to this cache entry is up to date, lest we lose
the local modifications". As we explicitly allow submodule
checkout to drift from the supermodule index entry, the check
should say "Ok, for submodules, not matching is the norm" for
now.
Later when we have the ability to mark "I care about this
submodule to be always in sync with the superproject" (thereby
implementing automatic recursive checkout and perhaps diff,
among other things), we should check if the submodule in
question is marked as such and perform the current test.
Acked-by: Lars Hjemli <hjemli@gmail.com>
Acked-by: Sven Verdoolaege <skimo@kotnet.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-08-04 09:13:09 +04:00
|
|
|
/*
|
2017-03-15 00:46:39 +03:00
|
|
|
* Historic default policy was to allow submodule to be out
|
|
|
|
* of sync wrt the superproject index. If the submodule was
|
|
|
|
* not considered interesting above, we don't care here.
|
unpack-trees.c: assume submodules are clean during check-out
Sven originally raised this issue:
If you have a submodule checked out and you go back (or
forward) to a revision of the supermodule that contains a
different revision of the submodule and then switch to
another revision, it will complain that the submodule is not
uptodate, because git simply didn't update the submodule in
the first move.
The current policy is to consider it is perfectly normal that
checked-out submodule is out-of-sync wrt the supermodule index.
At least until we introduce a superproject repository
configuration option that says "in this repository, I do care
about this submodule and at any time I move around in the
superproject, recursively check out the submodule to match", it
is a reasonable policy, as we currently do not recursively
checkout the submodules at all. The most extreme case of this
policy is that the superproject index knows about the submodule
but the subdirectory does not even have to be checked out.
The function verify_uptodate(), called during the two-way merge
aka branch switching, is about "make sure the filesystem entity
that corresponds to this cache entry is up to date, lest we lose
the local modifications". As we explicitly allow submodule
checkout to drift from the supermodule index entry, the check
should say "Ok, for submodules, not matching is the norm" for
now.
Later when we have the ability to mark "I care about this
submodule to be always in sync with the superproject" (thereby
implementing automatic recursive checkout and perhaps diff,
among other things), we should check if the submodule in
question is marked as such and perform the current test.
Acked-by: Lars Hjemli <hjemli@gmail.com>
Acked-by: Sven Verdoolaege <skimo@kotnet.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-08-04 09:13:09 +04:00
|
|
|
*/
|
2008-01-15 03:03:17 +03:00
|
|
|
if (S_ISGITLINK(ce->ce_mode))
|
2008-02-07 19:39:48 +03:00
|
|
|
return 0;
|
2017-03-15 00:46:39 +03:00
|
|
|
|
2006-07-30 22:26:15 +04:00
|
|
|
errno = 0;
|
|
|
|
}
|
|
|
|
if (errno == ENOENT)
|
2008-02-07 19:39:48 +03:00
|
|
|
return 0;
|
2019-03-22 12:31:35 +03:00
|
|
|
return add_rejected_path(o, error_type, ce->name);
|
2009-08-20 17:47:07 +04:00
|
|
|
}
|
|
|
|
|
2018-04-19 20:58:12 +03:00
|
|
|
int verify_uptodate(const struct cache_entry *ce,
|
|
|
|
struct unpack_trees_options *o)
|
2009-08-20 17:47:07 +04:00
|
|
|
{
|
unpack-trees: move all skip-worktree checks back to unpack_trees()
Earlier, the will_have_skip_worktree() checks are done in various
places, which makes it hard to traverse the index tree-alike, required
by excluded_from_list(). This patch moves all the checks into two
loops in unpack_trees().
Entries in index in this operation can be classified into two
groups: ones already in index before unpack_trees() is called and ones
added to index after traverse_trees() is called.
In both groups, before checking file status on worktree, the future
skip-worktree bit must be checked, so that if an entry will be outside
worktree, worktree should not be checked.
For the first group, the future skip-worktree bit is precomputed and
stored as CE_NEW_SKIP_WORKTREE in the first loop before
traverse_trees() is called so that *way_merge() function does not need
to compute it again.
For the second group, because we don't know what entries will be in
this group until traverse_trees() finishes, operations that need
future skip-worktree check is delayed until CE_NEW_SKIP_WORKTREE is
computed in the second loop. CE_ADDED is used to mark entries in the
second group.
CE_ADDED and CE_NEW_SKIP_WORKTREE are temporary flags used in
unpack_trees(). CE_ADDED is only used by add_to_index(), which should
not be called while unpack_trees() is running.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-11-27 09:24:04 +03:00
|
|
|
if (!o->skip_sparse_checkout && (ce->ce_flags & CE_NEW_SKIP_WORKTREE))
|
2009-08-20 17:47:10 +04:00
|
|
|
return 0;
|
2010-08-11 12:38:06 +04:00
|
|
|
return verify_uptodate_1(ce, o, ERROR_NOT_UPTODATE_FILE);
|
2006-07-30 22:26:15 +04:00
|
|
|
}
|
|
|
|
|
2013-06-02 19:46:55 +04:00
|
|
|
static int verify_uptodate_sparse(const struct cache_entry *ce,
|
2009-08-20 17:47:09 +04:00
|
|
|
struct unpack_trees_options *o)
|
|
|
|
{
|
2020-03-27 03:48:56 +03:00
|
|
|
return verify_uptodate_1(ce, o, WARNING_SPARSE_NOT_UPTODATE_FILE);
|
2006-07-30 22:26:15 +04:00
|
|
|
}
|
|
|
|
|
2018-08-13 19:14:26 +03:00
|
|
|
/*
|
|
|
|
* TODO: We should actually invalidate o->result, not src_index [1].
|
|
|
|
* But since cache tree and untracked cache both are not copied to
|
|
|
|
* o->result until unpacking is complete, we invalidate them on
|
|
|
|
* src_index instead with the assumption that they will be copied to
|
|
|
|
* dst_index at the end.
|
|
|
|
*
|
|
|
|
* [1] src_index->cache_tree is also used in unpack_callback() so if
|
|
|
|
* we invalidate o->result, we need to update it to use
|
|
|
|
* o->result.cache_tree as well.
|
|
|
|
*/
|
2013-06-02 19:46:55 +04:00
|
|
|
static void invalidate_ce_path(const struct cache_entry *ce,
|
|
|
|
struct unpack_trees_options *o)
|
2006-07-30 22:26:15 +04:00
|
|
|
{
|
2015-03-08 13:12:35 +03:00
|
|
|
if (!ce)
|
|
|
|
return;
|
|
|
|
cache_tree_invalidate_path(o->src_index, ce->name);
|
dir.c: ignore paths containing .git when invalidating untracked cache
read_directory() code ignores all paths named ".git" even if it's not
a valid git repository. See treat_path() for details. Since ".git" is
basically invisible to read_directory(), when we are asked to
invalidate a path that contains ".git", we can safely ignore it
because the slow path would not consider it anyway.
This helps when fsmonitor is used and we have a real ".git" repo at
worktree top. Occasionally .git/index will be updated and if the
fsmonitor hook does not filter it, untracked cache is asked to
invalidate the path ".git/index".
Without this patch, we invalidate the root directory unncessarily,
which:
- makes read_directory() fall back to slow path for root directory
(slower)
- makes the index dirty (because UNTR extension is updated). Depending
on the index size, writing it down could also be slow.
A note about the new "safe_path" knob. Since this new check could be
relatively expensive, avoid it when we know it's not needed. If the
path comes from the index, it can't contain ".git". If it does
contain, we may be screwed up at many more levels, not just this one.
Noticed-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-02-07 12:21:40 +03:00
|
|
|
untracked_cache_invalidate_path(o->src_index, ce->name, 1);
|
2006-07-30 22:26:15 +04:00
|
|
|
}
|
|
|
|
|
2007-07-17 22:28:28 +04:00
|
|
|
/*
|
|
|
|
* Check that checking out ce->sha1 in subdir ce->name is not
|
|
|
|
* going to overwrite any working files.
|
|
|
|
*/
|
2017-03-15 00:46:38 +03:00
|
|
|
static int verify_clean_submodule(const char *old_sha1,
|
|
|
|
const struct cache_entry *ce,
|
2013-06-02 19:46:55 +04:00
|
|
|
struct unpack_trees_options *o)
|
2007-07-17 22:28:28 +04:00
|
|
|
{
|
2017-03-15 00:46:39 +03:00
|
|
|
if (!submodule_from_ce(ce))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return check_submodule_move_head(ce, old_sha1,
|
|
|
|
oid_to_hex(&ce->oid), o);
|
2007-07-17 22:28:28 +04:00
|
|
|
}
|
|
|
|
|
2013-06-02 19:46:55 +04:00
|
|
|
static int verify_clean_subdirectory(const struct cache_entry *ce,
|
|
|
|
struct unpack_trees_options *o)
|
2007-03-16 09:25:22 +03:00
|
|
|
{
|
|
|
|
/*
|
2007-07-17 22:28:28 +04:00
|
|
|
* we are about to extract "ce->name"; we would not want to lose
|
2007-03-16 09:25:22 +03:00
|
|
|
* anything in the existing directory there.
|
|
|
|
*/
|
|
|
|
int namelen;
|
2009-01-01 23:54:33 +03:00
|
|
|
int i;
|
2007-03-16 09:25:22 +03:00
|
|
|
struct dir_struct d;
|
|
|
|
char *pathbuf;
|
|
|
|
int cnt = 0;
|
2007-07-17 22:28:28 +04:00
|
|
|
|
2017-03-15 00:46:38 +03:00
|
|
|
if (S_ISGITLINK(ce->ce_mode)) {
|
2017-10-16 01:07:06 +03:00
|
|
|
struct object_id oid;
|
refs: convert resolve_gitlink_ref to struct object_id
Convert the declaration and definition of resolve_gitlink_ref to use
struct object_id and apply the following semantic patch:
@@
expression E1, E2, E3;
@@
- resolve_gitlink_ref(E1, E2, E3.hash)
+ resolve_gitlink_ref(E1, E2, &E3)
@@
expression E1, E2, E3;
@@
- resolve_gitlink_ref(E1, E2, E3->hash)
+ resolve_gitlink_ref(E1, E2, E3)
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-16 01:07:07 +03:00
|
|
|
int sub_head = resolve_gitlink_ref(ce->name, "HEAD", &oid);
|
2017-03-15 00:46:38 +03:00
|
|
|
/*
|
|
|
|
* If we are not going to update the submodule, then
|
2007-07-17 22:28:28 +04:00
|
|
|
* we don't care.
|
|
|
|
*/
|
convert "oidcmp() == 0" to oideq()
Using the more restrictive oideq() should, in the long run,
give the compiler more opportunities to optimize these
callsites. For now, this conversion should be a complete
noop with respect to the generated code.
The result is also perhaps a little more readable, as it
avoids the "zero is equal" idiom. Since it's so prevalent in
C, I think seasoned programmers tend not to even notice it
anymore, but it can sometimes make for awkward double
negations (e.g., we can drop a few !!oidcmp() instances
here).
This patch was generated almost entirely by the included
coccinelle patch. This mechanical conversion should be
completely safe, because we check explicitly for cases where
oidcmp() is compared to 0, which is what oideq() is doing
under the hood. Note that we don't have to catch "!oidcmp()"
separately; coccinelle's standard isomorphisms make sure the
two are treated equivalently.
I say "almost" because I did hand-edit the coccinelle output
to fix up a few style violations (it mostly keeps the
original formatting, but sometimes unwraps long lines).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-08-29 00:22:40 +03:00
|
|
|
if (!sub_head && oideq(&oid, &ce->oid))
|
2007-07-17 22:28:28 +04:00
|
|
|
return 0;
|
2017-10-16 01:07:06 +03:00
|
|
|
return verify_clean_submodule(sub_head ? NULL : oid_to_hex(&oid),
|
2019-03-20 11:15:27 +03:00
|
|
|
ce, o);
|
2007-07-17 22:28:28 +04:00
|
|
|
}
|
2007-03-16 09:25:22 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* First let's make sure we do not have a local modification
|
|
|
|
* in that directory.
|
|
|
|
*/
|
2012-07-06 20:07:30 +04:00
|
|
|
namelen = ce_namelen(ce);
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
for (i = locate_in_src_index(ce, o);
|
|
|
|
i < o->src_index->cache_nr;
|
|
|
|
i++) {
|
2009-01-01 23:54:32 +03:00
|
|
|
struct cache_entry *ce2 = o->src_index->cache[i];
|
|
|
|
int len = ce_namelen(ce2);
|
2007-03-16 09:25:22 +03:00
|
|
|
if (len < namelen ||
|
2009-01-01 23:54:32 +03:00
|
|
|
strncmp(ce->name, ce2->name, namelen) ||
|
|
|
|
ce2->name[namelen] != '/')
|
2007-03-16 09:25:22 +03:00
|
|
|
break;
|
|
|
|
/*
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
* ce2->name is an entry in the subdirectory to be
|
|
|
|
* removed.
|
2007-03-16 09:25:22 +03:00
|
|
|
*/
|
2009-01-01 23:54:32 +03:00
|
|
|
if (!ce_stage(ce2)) {
|
|
|
|
if (verify_uptodate(ce2, o))
|
2008-02-07 19:39:48 +03:00
|
|
|
return -1;
|
2009-01-01 23:54:32 +03:00
|
|
|
add_entry(o, ce2, CE_REMOVE, 0);
|
2018-08-18 17:41:27 +03:00
|
|
|
invalidate_ce_path(ce, o);
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 01:59:54 +03:00
|
|
|
mark_ce_used(ce2, o);
|
2007-03-16 09:25:22 +03:00
|
|
|
}
|
|
|
|
cnt++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Then we need to make sure that we do not lose a locally
|
|
|
|
* present file that is not ignored.
|
|
|
|
*/
|
2015-09-25 00:07:03 +03:00
|
|
|
pathbuf = xstrfmt("%.*s/", namelen, ce->name);
|
2007-03-16 09:25:22 +03:00
|
|
|
|
|
|
|
memset(&d, 0, sizeof(d));
|
|
|
|
if (o->dir)
|
|
|
|
d.exclude_per_dir = o->dir->exclude_per_dir;
|
2018-08-13 19:14:29 +03:00
|
|
|
i = read_directory(&d, o->src_index, pathbuf, namelen+1, NULL);
|
2007-03-16 09:25:22 +03:00
|
|
|
if (i)
|
2019-03-22 12:31:35 +03:00
|
|
|
return add_rejected_path(o, ERROR_NOT_UPTODATE_DIR, ce->name);
|
2007-03-16 09:25:22 +03:00
|
|
|
free(pathbuf);
|
|
|
|
return cnt;
|
|
|
|
}
|
|
|
|
|
2008-03-22 19:35:59 +03:00
|
|
|
/*
|
|
|
|
* This gets called when there was no index entry for the tree entry 'dst',
|
|
|
|
* but we found a file in the working tree that 'lstat()' said was fine,
|
|
|
|
* and we're on a case-insensitive filesystem.
|
|
|
|
*
|
|
|
|
* See if we can find a case-insensitive match in the index that also
|
|
|
|
* matches the stat information, and assume it's that other file!
|
|
|
|
*/
|
2010-10-09 17:52:58 +04:00
|
|
|
static int icase_exists(struct unpack_trees_options *o, const char *name, int len, struct stat *st)
|
2008-03-22 19:35:59 +03:00
|
|
|
{
|
Convert "struct cache_entry *" to "const ..." wherever possible
I attempted to make index_state->cache[] a "const struct cache_entry **"
to find out how existing entries in index are modified and where. The
question I have is what do we do if we really need to keep track of on-disk
changes in the index. The result is
- diff-lib.c: setting CE_UPTODATE
- name-hash.c: setting CE_HASHED
- preload-index.c, read-cache.c, unpack-trees.c and
builtin/update-index: obvious
- entry.c: write_entry() may refresh the checked out entry via
fill_stat_cache_info(). This causes "non-const struct cache_entry
*" in builtin/apply.c, builtin/checkout-index.c and
builtin/checkout.c
- builtin/ls-files.c: --with-tree changes stagemask and may set
CE_UPDATE
Of these, write_entry() and its call sites are probably most
interesting because it modifies on-disk info. But this is stat info
and can be retrieved via refresh, at least for porcelain
commands. Other just uses ce_flags for local purposes.
So, keeping track of "dirty" entries is just a matter of setting a
flag in index modification functions exposed by read-cache.c. Except
unpack-trees, the rest of the code base does not do anything funny
behind read-cache's back.
The actual patch is less valueable than the summary above. But if
anyone wants to re-identify the above sites. Applying this patch, then
this:
diff --git a/cache.h b/cache.h
index 430d021..1692891 100644
--- a/cache.h
+++ b/cache.h
@@ -267,7 +267,7 @@ static inline unsigned int canon_mode(unsigned int mode)
#define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1)
struct index_state {
- struct cache_entry **cache;
+ const struct cache_entry **cache;
unsigned int version;
unsigned int cache_nr, cache_alloc, cache_changed;
struct string_list *resolve_undo;
will help quickly identify them without bogus warnings.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-09 19:29:00 +04:00
|
|
|
const struct cache_entry *src;
|
2008-03-22 19:35:59 +03:00
|
|
|
|
2013-09-17 11:06:15 +04:00
|
|
|
src = index_file_exists(o->src_index, name, len, 1);
|
2009-12-14 14:43:58 +03:00
|
|
|
return src && !ie_match_stat(o->src_index, src, st, CE_MATCH_IGNORE_VALID|CE_MATCH_IGNORE_SKIP_WORKTREE);
|
2008-03-22 19:35:59 +03:00
|
|
|
}
|
|
|
|
|
2010-10-09 17:52:58 +04:00
|
|
|
static int check_ok_to_remove(const char *name, int len, int dtype,
|
2013-06-02 19:46:55 +04:00
|
|
|
const struct cache_entry *ce, struct stat *st,
|
2010-10-09 17:52:58 +04:00
|
|
|
enum unpack_trees_error_types error_type,
|
|
|
|
struct unpack_trees_options *o)
|
|
|
|
{
|
Convert "struct cache_entry *" to "const ..." wherever possible
I attempted to make index_state->cache[] a "const struct cache_entry **"
to find out how existing entries in index are modified and where. The
question I have is what do we do if we really need to keep track of on-disk
changes in the index. The result is
- diff-lib.c: setting CE_UPTODATE
- name-hash.c: setting CE_HASHED
- preload-index.c, read-cache.c, unpack-trees.c and
builtin/update-index: obvious
- entry.c: write_entry() may refresh the checked out entry via
fill_stat_cache_info(). This causes "non-const struct cache_entry
*" in builtin/apply.c, builtin/checkout-index.c and
builtin/checkout.c
- builtin/ls-files.c: --with-tree changes stagemask and may set
CE_UPDATE
Of these, write_entry() and its call sites are probably most
interesting because it modifies on-disk info. But this is stat info
and can be retrieved via refresh, at least for porcelain
commands. Other just uses ce_flags for local purposes.
So, keeping track of "dirty" entries is just a matter of setting a
flag in index modification functions exposed by read-cache.c. Except
unpack-trees, the rest of the code base does not do anything funny
behind read-cache's back.
The actual patch is less valueable than the summary above. But if
anyone wants to re-identify the above sites. Applying this patch, then
this:
diff --git a/cache.h b/cache.h
index 430d021..1692891 100644
--- a/cache.h
+++ b/cache.h
@@ -267,7 +267,7 @@ static inline unsigned int canon_mode(unsigned int mode)
#define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1)
struct index_state {
- struct cache_entry **cache;
+ const struct cache_entry **cache;
unsigned int version;
unsigned int cache_nr, cache_alloc, cache_changed;
struct string_list *resolve_undo;
will help quickly identify them without bogus warnings.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-09 19:29:00 +04:00
|
|
|
const struct cache_entry *result;
|
2010-10-09 17:52:58 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* It may be that the 'lstat()' succeeded even though
|
|
|
|
* target 'ce' was absent, because there is an old
|
|
|
|
* entry that is different only in case..
|
|
|
|
*
|
|
|
|
* Ignore that lstat() if it matches.
|
|
|
|
*/
|
|
|
|
if (ignore_case && icase_exists(o, name, len, st))
|
|
|
|
return 0;
|
|
|
|
|
2012-06-06 09:21:42 +04:00
|
|
|
if (o->dir &&
|
2018-08-13 19:14:29 +03:00
|
|
|
is_excluded(o->dir, o->src_index, name, &dtype))
|
2010-10-09 17:52:58 +04:00
|
|
|
/*
|
|
|
|
* ce->name is explicitly excluded, so it is Ok to
|
|
|
|
* overwrite it.
|
|
|
|
*/
|
|
|
|
return 0;
|
|
|
|
if (S_ISDIR(st->st_mode)) {
|
|
|
|
/*
|
|
|
|
* We are checking out path "foo" and
|
|
|
|
* found "foo/." in the working tree.
|
|
|
|
* This is tricky -- if we have modified
|
|
|
|
* files that are in "foo/" we would lose
|
|
|
|
* them.
|
|
|
|
*/
|
2019-03-20 11:15:27 +03:00
|
|
|
if (verify_clean_subdirectory(ce, o) < 0)
|
2010-10-09 17:52:58 +04:00
|
|
|
return -1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The previous round may already have decided to
|
|
|
|
* delete this path, which is in a subdirectory that
|
|
|
|
* is being replaced with a blob.
|
|
|
|
*/
|
2013-09-17 11:06:15 +04:00
|
|
|
result = index_file_exists(&o->result, name, len, 0);
|
2010-10-09 17:52:58 +04:00
|
|
|
if (result) {
|
|
|
|
if (result->ce_flags & CE_REMOVE)
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-03-22 12:31:35 +03:00
|
|
|
return add_rejected_path(o, error_type, name);
|
2010-10-09 17:52:58 +04:00
|
|
|
}
|
|
|
|
|
2006-07-30 22:26:15 +04:00
|
|
|
/*
|
|
|
|
* We do not want to remove or overwrite a working tree file that
|
2006-12-05 03:00:46 +03:00
|
|
|
* is not tracked, unless it is ignored.
|
2006-07-30 22:26:15 +04:00
|
|
|
*/
|
2013-06-02 19:46:55 +04:00
|
|
|
static int verify_absent_1(const struct cache_entry *ce,
|
|
|
|
enum unpack_trees_error_types error_type,
|
|
|
|
struct unpack_trees_options *o)
|
2006-07-30 22:26:15 +04:00
|
|
|
{
|
2010-10-09 17:53:00 +04:00
|
|
|
int len;
|
2006-07-30 22:26:15 +04:00
|
|
|
struct stat st;
|
|
|
|
|
|
|
|
if (o->index_only || o->reset || !o->update)
|
2008-02-07 19:39:48 +03:00
|
|
|
return 0;
|
2007-03-16 09:25:22 +03:00
|
|
|
|
checkout: don't follow symlinks when removing entries
At 1d718a5108 ("do not overwrite untracked symlinks", 2011-02-20),
symlink.c:check_leading_path() started returning different codes for
FL_ENOENT and FL_SYMLINK. But one of its callers, unlink_entry(), was
not adjusted for this change, so it started to follow symlinks on the
leading path of to-be-removed entries. Fix that and add a regression
test.
Note that since 1d718a5108 check_leading_path() no longer differentiates
the case where it found a symlink in the path's leading components from
the cases where it found a regular file or failed to lstat() the
component. So, a side effect of this current patch is that
unlink_entry() now returns early in all of these three cases. And
because we no longer try to unlink such paths, we also don't get the
warning from remove_or_warn().
For the regular file and symlink cases, it's questionable whether the
warning was useful in the first place: unlink_entry() removes tracked
paths that should no longer be present in the state we are checking out
to. If the path had its leading dir replaced by another file, it means
that the basename already doesn't exist, so there is no need for a
warning. Sure, we are leaving a regular file or symlink behind at the
path's dirname, but this file is either untracked now (so again, no
need to warn), or it will be replaced by a tracked file during the next
phase of this checkout operation.
As for failing to lstat() one of the leading components, the basename
might still exist only we cannot unlink it (e.g. due to the lack of the
required permissions). Since the user expect it to be removed
(especially with checkout's --no-overlay option), add back the warning
in this more relevant case.
Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-03-18 21:43:47 +03:00
|
|
|
len = check_leading_path(ce->name, ce_namelen(ce), 0);
|
2010-10-09 17:53:00 +04:00
|
|
|
if (!len)
|
2008-02-07 19:39:48 +03:00
|
|
|
return 0;
|
2010-10-09 17:53:00 +04:00
|
|
|
else if (len > 0) {
|
verify_absent: allow filenames longer than PATH_MAX
When unpack-trees wants to know whether a path will
overwrite anything in the working tree, we use lstat() to
see if there is anything there. But if we are going to write
"foo/bar", we can't just lstat("foo/bar"); we need to look
for leading prefixes (e.g., "foo"). So we use the lstat cache
to find the length of the leading prefix, and copy the
filename up to that length into a temporary buffer (since
the original name is const, we cannot just stick a NUL in
it).
The copy we make goes into a PATH_MAX-sized buffer, which
will overflow if the prefix is longer than PATH_MAX. How
this happens is a little tricky, since in theory PATH_MAX is
the biggest path we will have read from the filesystem. But
this can happen if:
- the compiled-in PATH_MAX does not accurately reflect
what the filesystem is capable of
- the leading prefix is not _quite_ what is on disk; it
contains the next element from the name we are checking.
So if we want to write "aaa/bbb/ccc/ddd" and "aaa/bbb"
exists, the prefix of interest is "aaa/bbb/ccc". If
"aaa/bbb" approaches PATH_MAX, then "ccc" can overflow
it.
So this can be triggered, but it's hard to do. In
particular, you cannot just "git clone" a bogus repo. The
verify_absent checks happen before unpack-trees writes
anything to the filesystem, so there are never any leading
prefixes during the initial checkout, and the bug doesn't
trigger. And by definition, these files are larger than
PATH_MAX, so writing them will fail, and clone will
complain (though it may write a partial path, which will
cause a subsequent "git checkout" to hit the bug).
We can fix it by creating the temporary path on the heap.
The extra malloc overhead is not important, as we are
already making at least one stat() call (and probably more
for the prefix discovery).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-08-19 21:12:37 +03:00
|
|
|
char *path;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
path = xmemdupz(ce->name, len);
|
2011-01-13 05:28:09 +03:00
|
|
|
if (lstat(path, &st))
|
2016-05-08 12:47:58 +03:00
|
|
|
ret = error_errno("cannot stat '%s'", path);
|
2017-03-15 00:46:39 +03:00
|
|
|
else {
|
|
|
|
if (submodule_from_ce(ce))
|
|
|
|
ret = check_submodule_move_head(ce,
|
|
|
|
oid_to_hex(&ce->oid),
|
|
|
|
NULL, o);
|
|
|
|
else
|
|
|
|
ret = check_ok_to_remove(path, len, DT_UNKNOWN, NULL,
|
|
|
|
&st, error_type, o);
|
|
|
|
}
|
verify_absent: allow filenames longer than PATH_MAX
When unpack-trees wants to know whether a path will
overwrite anything in the working tree, we use lstat() to
see if there is anything there. But if we are going to write
"foo/bar", we can't just lstat("foo/bar"); we need to look
for leading prefixes (e.g., "foo"). So we use the lstat cache
to find the length of the leading prefix, and copy the
filename up to that length into a temporary buffer (since
the original name is const, we cannot just stick a NUL in
it).
The copy we make goes into a PATH_MAX-sized buffer, which
will overflow if the prefix is longer than PATH_MAX. How
this happens is a little tricky, since in theory PATH_MAX is
the biggest path we will have read from the filesystem. But
this can happen if:
- the compiled-in PATH_MAX does not accurately reflect
what the filesystem is capable of
- the leading prefix is not _quite_ what is on disk; it
contains the next element from the name we are checking.
So if we want to write "aaa/bbb/ccc/ddd" and "aaa/bbb"
exists, the prefix of interest is "aaa/bbb/ccc". If
"aaa/bbb" approaches PATH_MAX, then "ccc" can overflow
it.
So this can be triggered, but it's hard to do. In
particular, you cannot just "git clone" a bogus repo. The
verify_absent checks happen before unpack-trees writes
anything to the filesystem, so there are never any leading
prefixes during the initial checkout, and the bug doesn't
trigger. And by definition, these files are larger than
PATH_MAX, so writing them will fail, and clone will
complain (though it may write a partial path, which will
cause a subsequent "git checkout" to hit the bug).
We can fix it by creating the temporary path on the heap.
The extra malloc overhead is not important, as we are
already making at least one stat() call (and probably more
for the prefix discovery).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-08-19 21:12:37 +03:00
|
|
|
free(path);
|
|
|
|
return ret;
|
2011-01-13 05:26:36 +03:00
|
|
|
} else if (lstat(ce->name, &st)) {
|
|
|
|
if (errno != ENOENT)
|
2016-05-08 12:47:58 +03:00
|
|
|
return error_errno("cannot stat '%s'", ce->name);
|
2011-01-13 05:26:36 +03:00
|
|
|
return 0;
|
|
|
|
} else {
|
2017-03-15 00:46:39 +03:00
|
|
|
if (submodule_from_ce(ce))
|
|
|
|
return check_submodule_move_head(ce, oid_to_hex(&ce->oid),
|
|
|
|
NULL, o);
|
|
|
|
|
2010-10-09 17:52:58 +04:00
|
|
|
return check_ok_to_remove(ce->name, ce_namelen(ce),
|
2011-01-13 05:26:36 +03:00
|
|
|
ce_to_dtype(ce), ce, &st,
|
|
|
|
error_type, o);
|
|
|
|
}
|
2006-07-30 22:26:15 +04:00
|
|
|
}
|
2010-10-09 17:52:58 +04:00
|
|
|
|
2013-06-02 19:46:55 +04:00
|
|
|
static int verify_absent(const struct cache_entry *ce,
|
2010-08-11 12:38:06 +04:00
|
|
|
enum unpack_trees_error_types error_type,
|
2009-08-20 17:47:07 +04:00
|
|
|
struct unpack_trees_options *o)
|
|
|
|
{
|
unpack-trees: move all skip-worktree checks back to unpack_trees()
Earlier, the will_have_skip_worktree() checks are done in various
places, which makes it hard to traverse the index tree-alike, required
by excluded_from_list(). This patch moves all the checks into two
loops in unpack_trees().
Entries in index in this operation can be classified into two
groups: ones already in index before unpack_trees() is called and ones
added to index after traverse_trees() is called.
In both groups, before checking file status on worktree, the future
skip-worktree bit must be checked, so that if an entry will be outside
worktree, worktree should not be checked.
For the first group, the future skip-worktree bit is precomputed and
stored as CE_NEW_SKIP_WORKTREE in the first loop before
traverse_trees() is called so that *way_merge() function does not need
to compute it again.
For the second group, because we don't know what entries will be in
this group until traverse_trees() finishes, operations that need
future skip-worktree check is delayed until CE_NEW_SKIP_WORKTREE is
computed in the second loop. CE_ADDED is used to mark entries in the
second group.
CE_ADDED and CE_NEW_SKIP_WORKTREE are temporary flags used in
unpack_trees(). CE_ADDED is only used by add_to_index(), which should
not be called while unpack_trees() is running.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-11-27 09:24:04 +03:00
|
|
|
if (!o->skip_sparse_checkout && (ce->ce_flags & CE_NEW_SKIP_WORKTREE))
|
2009-08-20 17:47:10 +04:00
|
|
|
return 0;
|
2010-08-11 12:38:06 +04:00
|
|
|
return verify_absent_1(ce, error_type, o);
|
2009-08-20 17:47:07 +04:00
|
|
|
}
|
2006-07-30 22:26:15 +04:00
|
|
|
|
2013-06-02 19:46:55 +04:00
|
|
|
static int verify_absent_sparse(const struct cache_entry *ce,
|
|
|
|
enum unpack_trees_error_types error_type,
|
|
|
|
struct unpack_trees_options *o)
|
2009-08-20 17:47:09 +04:00
|
|
|
{
|
2020-03-27 03:48:46 +03:00
|
|
|
return verify_absent_1(ce, error_type, o);
|
2009-08-20 17:47:09 +04:00
|
|
|
}
|
2006-07-30 22:26:15 +04:00
|
|
|
|
2013-06-02 19:46:54 +04:00
|
|
|
static int merged_entry(const struct cache_entry *ce,
|
2013-06-02 19:46:55 +04:00
|
|
|
const struct cache_entry *old,
|
2013-06-02 19:46:54 +04:00
|
|
|
struct unpack_trees_options *o)
|
2006-07-30 22:26:15 +04:00
|
|
|
{
|
2008-03-16 21:42:50 +03:00
|
|
|
int update = CE_UPDATE;
|
block alloc: allocate cache entries from mem_pool
When reading large indexes from disk, a portion of the time is
dominated in malloc() calls. This can be mitigated by allocating a
large block of memory and manage it ourselves via memory pools.
This change moves the cache entry allocation to be on top of memory
pools.
Design:
The index_state struct will gain a notion of an associated memory_pool
from which cache_entries will be allocated from. When reading in the
index from disk, we have information on the number of entries and
their size, which can guide us in deciding how large our initial
memory allocation should be. When an index is discarded, the
associated memory_pool will be discarded as well - so the lifetime of
a cache_entry is tied to the lifetime of the index_state that it was
allocated for.
In the case of a Split Index, the following rules are followed. 1st,
some terminology is defined:
Terminology:
- 'the_index': represents the logical view of the index
- 'split_index': represents the "base" cache entries. Read from the
split index file.
'the_index' can reference a single split_index, as well as
cache_entries from the split_index. `the_index` will be discarded
before the `split_index` is. This means that when we are allocating
cache_entries in the presence of a split index, we need to allocate
the entries from the `split_index`'s memory pool. This allows us to
follow the pattern that `the_index` can reference cache_entries from
the `split_index`, and that the cache_entries will not be freed while
they are still being referenced.
Managing transient cache_entry structs:
Cache entries are usually allocated for an index, but this is not always
the case. Cache entries are sometimes allocated because this is the
type that the existing checkout_entry function works with. Because of
this, the existing code needs to handle cache entries associated with an
index / memory pool, and those that only exist transiently. Several
strategies were contemplated around how to handle this:
Chosen approach:
An extra field was added to the cache_entry type to track whether the
cache_entry was allocated from a memory pool or not. This is currently
an int field, as there are no more available bits in the existing
ce_flags bit field. If / when more bits are needed, this new field can
be turned into a proper bit field.
Alternatives:
1) Do not include any information about how the cache_entry was
allocated. Calling code would be responsible for tracking whether the
cache_entry needed to be freed or not.
Pro: No extra memory overhead to track this state
Con: Extra complexity in callers to handle this correctly.
The extra complexity and burden to not regress this behavior in the
future was more than we wanted.
2) cache_entry would gain knowledge about which mem_pool allocated it
Pro: Could (potentially) do extra logic to know when a mem_pool no
longer had references to any cache_entry
Con: cache_entry would grow heavier by a pointer, instead of int
We didn't see a tangible benefit to this approach
3) Do not add any extra information to a cache_entry, but when freeing a
cache entry, check if the memory exists in a region managed by existing
mem_pools.
Pro: No extra memory overhead to track state
Con: Extra computation is performed when freeing cache entries
We decided tracking and iterating over known memory pool regions was
less desirable than adding an extra field to track this stae.
Signed-off-by: Jameson Miller <jamill@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-02 22:49:37 +03:00
|
|
|
struct cache_entry *merge = dup_cache_entry(ce, &o->result);
|
2008-03-16 21:42:50 +03:00
|
|
|
|
"reset --merge": fix unmerged case
Commit 9e8ecea (Add 'merge' mode to 'git reset', 2008-12-01) disallowed
"git reset --merge" when there was unmerged entries. But it wished if
unmerged entries were reset as if --hard (instead of --merge) has been
used. This makes sense because all "mergy" operations makes sure that
any path involved in the merge does not have local modifications before
starting, so resetting such a path away won't lose any information.
The previous commit changed the behavior of --merge to accept resetting
unmerged entries if they are reset to a different state than HEAD, but it
did not reset the changes in the work tree, leaving the conflict markers
in the resulting file in the work tree.
Fix it by doing three things:
- Update the documentation to match the wish of original "reset --merge"
better, namely, "An unmerged entry is a sign that the path didn't have
any local modification and can be safely resetted to whatever the new
HEAD records";
- Update read_index_unmerged(), which reads the index file into the cache
while dropping any higher-stage entries down to stage #0, not to copy
the object name from the higher stage entry. The code used to take the
object name from the a stage entry ("base" if you happened to have
stage #1, or "ours" if both sides added, etc.), which essentially meant
that you are getting random results depending on what the merge did.
The _only_ reason we want to keep a previously unmerged entry in the
index at stage #0 is so that we don't forget the fact that we have
corresponding file in the work tree in order to be able to remove it
when the tree we are resetting to does not have the path. In order to
differentiate such an entry from ordinary cache entry, the cache entry
added by read_index_unmerged() is marked as CE_CONFLICTED.
- Update merged_entry() and deleted_entry() so that they pay attention to
cache entries marked as CE_CONFLICTED. They are previously unmerged
entries, and the files in the work tree that correspond to them are
resetted away by oneway_merge() to the version from the tree we are
resetting to.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-01 10:04:04 +03:00
|
|
|
if (!old) {
|
unpack-trees: move all skip-worktree checks back to unpack_trees()
Earlier, the will_have_skip_worktree() checks are done in various
places, which makes it hard to traverse the index tree-alike, required
by excluded_from_list(). This patch moves all the checks into two
loops in unpack_trees().
Entries in index in this operation can be classified into two
groups: ones already in index before unpack_trees() is called and ones
added to index after traverse_trees() is called.
In both groups, before checking file status on worktree, the future
skip-worktree bit must be checked, so that if an entry will be outside
worktree, worktree should not be checked.
For the first group, the future skip-worktree bit is precomputed and
stored as CE_NEW_SKIP_WORKTREE in the first loop before
traverse_trees() is called so that *way_merge() function does not need
to compute it again.
For the second group, because we don't know what entries will be in
this group until traverse_trees() finishes, operations that need
future skip-worktree check is delayed until CE_NEW_SKIP_WORKTREE is
computed in the second loop. CE_ADDED is used to mark entries in the
second group.
CE_ADDED and CE_NEW_SKIP_WORKTREE are temporary flags used in
unpack_trees(). CE_ADDED is only used by add_to_index(), which should
not be called while unpack_trees() is running.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-11-27 09:24:04 +03:00
|
|
|
/*
|
|
|
|
* New index entries. In sparse checkout, the following
|
|
|
|
* verify_absent() will be delayed until after
|
|
|
|
* traverse_trees() finishes in unpack_trees(), then:
|
|
|
|
*
|
|
|
|
* - CE_NEW_SKIP_WORKTREE will be computed correctly
|
|
|
|
* - verify_absent() be called again, this time with
|
|
|
|
* correct CE_NEW_SKIP_WORKTREE
|
|
|
|
*
|
|
|
|
* verify_absent() call here does nothing in sparse
|
|
|
|
* checkout (i.e. o->skip_sparse_checkout == 0)
|
|
|
|
*/
|
|
|
|
update |= CE_ADDED;
|
|
|
|
merge->ce_flags |= CE_NEW_SKIP_WORKTREE;
|
|
|
|
|
2013-06-02 19:46:54 +04:00
|
|
|
if (verify_absent(merge,
|
|
|
|
ERROR_WOULD_LOSE_UNTRACKED_OVERWRITTEN, o)) {
|
2018-07-02 22:49:31 +03:00
|
|
|
discard_cache_entry(merge);
|
"reset --merge": fix unmerged case
Commit 9e8ecea (Add 'merge' mode to 'git reset', 2008-12-01) disallowed
"git reset --merge" when there was unmerged entries. But it wished if
unmerged entries were reset as if --hard (instead of --merge) has been
used. This makes sense because all "mergy" operations makes sure that
any path involved in the merge does not have local modifications before
starting, so resetting such a path away won't lose any information.
The previous commit changed the behavior of --merge to accept resetting
unmerged entries if they are reset to a different state than HEAD, but it
did not reset the changes in the work tree, leaving the conflict markers
in the resulting file in the work tree.
Fix it by doing three things:
- Update the documentation to match the wish of original "reset --merge"
better, namely, "An unmerged entry is a sign that the path didn't have
any local modification and can be safely resetted to whatever the new
HEAD records";
- Update read_index_unmerged(), which reads the index file into the cache
while dropping any higher-stage entries down to stage #0, not to copy
the object name from the higher stage entry. The code used to take the
object name from the a stage entry ("base" if you happened to have
stage #1, or "ours" if both sides added, etc.), which essentially meant
that you are getting random results depending on what the merge did.
The _only_ reason we want to keep a previously unmerged entry in the
index at stage #0 is so that we don't forget the fact that we have
corresponding file in the work tree in order to be able to remove it
when the tree we are resetting to does not have the path. In order to
differentiate such an entry from ordinary cache entry, the cache entry
added by read_index_unmerged() is marked as CE_CONFLICTED.
- Update merged_entry() and deleted_entry() so that they pay attention to
cache entries marked as CE_CONFLICTED. They are previously unmerged
entries, and the files in the work tree that correspond to them are
resetted away by oneway_merge() to the version from the tree we are
resetting to.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-01 10:04:04 +03:00
|
|
|
return -1;
|
2013-06-02 19:46:54 +04:00
|
|
|
}
|
"reset --merge": fix unmerged case
Commit 9e8ecea (Add 'merge' mode to 'git reset', 2008-12-01) disallowed
"git reset --merge" when there was unmerged entries. But it wished if
unmerged entries were reset as if --hard (instead of --merge) has been
used. This makes sense because all "mergy" operations makes sure that
any path involved in the merge does not have local modifications before
starting, so resetting such a path away won't lose any information.
The previous commit changed the behavior of --merge to accept resetting
unmerged entries if they are reset to a different state than HEAD, but it
did not reset the changes in the work tree, leaving the conflict markers
in the resulting file in the work tree.
Fix it by doing three things:
- Update the documentation to match the wish of original "reset --merge"
better, namely, "An unmerged entry is a sign that the path didn't have
any local modification and can be safely resetted to whatever the new
HEAD records";
- Update read_index_unmerged(), which reads the index file into the cache
while dropping any higher-stage entries down to stage #0, not to copy
the object name from the higher stage entry. The code used to take the
object name from the a stage entry ("base" if you happened to have
stage #1, or "ours" if both sides added, etc.), which essentially meant
that you are getting random results depending on what the merge did.
The _only_ reason we want to keep a previously unmerged entry in the
index at stage #0 is so that we don't forget the fact that we have
corresponding file in the work tree in order to be able to remove it
when the tree we are resetting to does not have the path. In order to
differentiate such an entry from ordinary cache entry, the cache entry
added by read_index_unmerged() is marked as CE_CONFLICTED.
- Update merged_entry() and deleted_entry() so that they pay attention to
cache entries marked as CE_CONFLICTED. They are previously unmerged
entries, and the files in the work tree that correspond to them are
resetted away by oneway_merge() to the version from the tree we are
resetting to.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-01 10:04:04 +03:00
|
|
|
invalidate_ce_path(merge, o);
|
2017-03-15 00:46:39 +03:00
|
|
|
|
unpack-trees: check for missing submodule directory in merged_entry
Using `git checkout --recurse-submodules` to switch between a
branch with no submodules and a branch with initialized nested
submodules currently causes a fatal error:
$ git checkout --recurse-submodules branch-with-nested-submodules
fatal: exec '--super-prefix=submodule/nested/': cd to 'nested'
failed: No such file or directory
error: Submodule 'nested' could not be updated.
error: Submodule 'submodule/nested' cannot checkout new HEAD.
error: Submodule 'submodule' could not be updated.
M submodule
Switched to branch 'branch-with-nested-submodules'
The checkout succeeds but the worktree and index of the first level
submodule are left empty:
$ cd submodule
$ git -c status.submoduleSummary=1 status
HEAD detached at b3ce885
Changes to be committed:
(use "git restore --staged <file>..." to unstage)
deleted: .gitmodules
deleted: first.t
deleted: nested
fatal: not a git repository: 'nested/.git'
Submodule changes to be committed:
* nested 1e96f59...0000000:
$ git ls-files -s
$ # empty
$ ls -A
.git
The reason for the fatal error during the checkout is that a child git
process tries to cd into the yet unexisting nested submodule directory.
The sequence is the following:
1. The main git process (the one running in the superproject) eventually
reaches write_entry() in entry.c, which creates the first level
submodule directory and then calls submodule_move_head() in submodule.c,
which spawns `git read-tree` in the submodule directory.
2. The first child git process (the one in the submodule of the
superproject) eventually calls check_submodule_move_head() at
unpack_trees.c:2021, which calls submodule_move_head in dry-run mode,
which spawns `git read-tree` in the nested submodule directory.
3. The second child git process tries to chdir() in the yet unexisting
nested submodule directory in start_command() at run-command.c:829 and
dies before exec'ing.
The reason why check_submodule_move_head() is reached in the first child
and not in the main process is that it is inside an
if(submodule_from_ce()) construct, and submodule_from_ce() returns a
valid struct submodule pointer, whereas it returns a null pointer in the
main git process.
The reason why submodule_from_ce() returns a null pointer in the main
git process is because the call to cache_lookup_path() in config_from()
(called from submodule_from_path() in submodule_from_ce()) returns a
null pointer since the hashmap "for_path" in the submodule_cache of
the_repository is not yet populated. It is not populated because both
repo_get_oid(repo, GITMODULES_INDEX, &oid) and repo_get_oid(repo,
GITMODULES_HEAD, &oid) in config_from_gitmodules() at
submodule-config.c:639-640 return -1, as at this stage of the operation,
neither the HEAD of the superproject nor its index contain any
.gitmodules file.
In contrast, in the first child the hashmap is populated because
repo_get_oid(repo, GITMODULES_HEAD, &oid) returns 0 as the HEAD of the
first level submodule, i.e. .git/modules/submodule/HEAD, points to a
commit where .gitmodules is present and records 'nested' as a submodule.
Fix this bug by checking that the submodule directory exists before
calling check_submodule_move_head() in merged_entry() in the `if(!old)`
branch, i.e. if going from a commit with no submodule to a commit with a
submodule present.
Also protect the other call to check_submodule_move_head() in
merged_entry() the same way as it is safer, even though the `else if
(!(old->ce_flags & CE_CONFLICTED))` branch of the code is not at play in
the present bug.
The other calls to check_submodule_move_head() in other functions in
unpack_trees.c are all already protected by calls to lstat() somewhere
in
the program flow so we don't need additional protection for them.
All commands in the unpack_trees machinery are affected, i.e. checkout,
reset and read-tree when called with the --recurse-submodules flag.
This bug was first reported in [1].
[1]
https://lore.kernel.org/git/7437BB59-4605-48EC-B05E-E2BDB2D9DABC@gmail.com/
Reported-by: Philippe Blain <levraiphilippeblain@gmail.com>
Reported-by: Damien Robert <damien.olivier.robert@gmail.com>
Signed-off-by: Philippe Blain <levraiphilippeblain@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-17 07:53:05 +03:00
|
|
|
if (submodule_from_ce(ce) && file_exists(ce->name)) {
|
2017-03-15 00:46:39 +03:00
|
|
|
int ret = check_submodule_move_head(ce, NULL,
|
|
|
|
oid_to_hex(&ce->oid),
|
|
|
|
o);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
"reset --merge": fix unmerged case
Commit 9e8ecea (Add 'merge' mode to 'git reset', 2008-12-01) disallowed
"git reset --merge" when there was unmerged entries. But it wished if
unmerged entries were reset as if --hard (instead of --merge) has been
used. This makes sense because all "mergy" operations makes sure that
any path involved in the merge does not have local modifications before
starting, so resetting such a path away won't lose any information.
The previous commit changed the behavior of --merge to accept resetting
unmerged entries if they are reset to a different state than HEAD, but it
did not reset the changes in the work tree, leaving the conflict markers
in the resulting file in the work tree.
Fix it by doing three things:
- Update the documentation to match the wish of original "reset --merge"
better, namely, "An unmerged entry is a sign that the path didn't have
any local modification and can be safely resetted to whatever the new
HEAD records";
- Update read_index_unmerged(), which reads the index file into the cache
while dropping any higher-stage entries down to stage #0, not to copy
the object name from the higher stage entry. The code used to take the
object name from the a stage entry ("base" if you happened to have
stage #1, or "ours" if both sides added, etc.), which essentially meant
that you are getting random results depending on what the merge did.
The _only_ reason we want to keep a previously unmerged entry in the
index at stage #0 is so that we don't forget the fact that we have
corresponding file in the work tree in order to be able to remove it
when the tree we are resetting to does not have the path. In order to
differentiate such an entry from ordinary cache entry, the cache entry
added by read_index_unmerged() is marked as CE_CONFLICTED.
- Update merged_entry() and deleted_entry() so that they pay attention to
cache entries marked as CE_CONFLICTED. They are previously unmerged
entries, and the files in the work tree that correspond to them are
resetted away by oneway_merge() to the version from the tree we are
resetting to.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-01 10:04:04 +03:00
|
|
|
} else if (!(old->ce_flags & CE_CONFLICTED)) {
|
2006-07-30 22:26:15 +04:00
|
|
|
/*
|
|
|
|
* See if we can re-use the old CE directly?
|
|
|
|
* That way we get the uptodate stat info.
|
|
|
|
*
|
2008-03-16 21:42:50 +03:00
|
|
|
* This also removes the UPDATE flag on a match; otherwise
|
|
|
|
* we will end up overwriting local changes in the work tree.
|
2006-07-30 22:26:15 +04:00
|
|
|
*/
|
|
|
|
if (same(old, merge)) {
|
2008-02-23 07:41:17 +03:00
|
|
|
copy_cache_entry(merge, old);
|
2008-03-16 21:42:50 +03:00
|
|
|
update = 0;
|
2006-07-30 22:26:15 +04:00
|
|
|
} else {
|
2013-06-02 19:46:54 +04:00
|
|
|
if (verify_uptodate(old, o)) {
|
2018-07-02 22:49:31 +03:00
|
|
|
discard_cache_entry(merge);
|
2008-02-07 19:39:48 +03:00
|
|
|
return -1;
|
2013-06-02 19:46:54 +04:00
|
|
|
}
|
unpack-trees: move all skip-worktree checks back to unpack_trees()
Earlier, the will_have_skip_worktree() checks are done in various
places, which makes it hard to traverse the index tree-alike, required
by excluded_from_list(). This patch moves all the checks into two
loops in unpack_trees().
Entries in index in this operation can be classified into two
groups: ones already in index before unpack_trees() is called and ones
added to index after traverse_trees() is called.
In both groups, before checking file status on worktree, the future
skip-worktree bit must be checked, so that if an entry will be outside
worktree, worktree should not be checked.
For the first group, the future skip-worktree bit is precomputed and
stored as CE_NEW_SKIP_WORKTREE in the first loop before
traverse_trees() is called so that *way_merge() function does not need
to compute it again.
For the second group, because we don't know what entries will be in
this group until traverse_trees() finishes, operations that need
future skip-worktree check is delayed until CE_NEW_SKIP_WORKTREE is
computed in the second loop. CE_ADDED is used to mark entries in the
second group.
CE_ADDED and CE_NEW_SKIP_WORKTREE are temporary flags used in
unpack_trees(). CE_ADDED is only used by add_to_index(), which should
not be called while unpack_trees() is running.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-11-27 09:24:04 +03:00
|
|
|
/* Migrate old flags over */
|
|
|
|
update |= old->ce_flags & (CE_SKIP_WORKTREE | CE_NEW_SKIP_WORKTREE);
|
2008-03-06 23:26:14 +03:00
|
|
|
invalidate_ce_path(old, o);
|
2006-07-30 22:26:15 +04:00
|
|
|
}
|
2017-03-15 00:46:39 +03:00
|
|
|
|
unpack-trees: check for missing submodule directory in merged_entry
Using `git checkout --recurse-submodules` to switch between a
branch with no submodules and a branch with initialized nested
submodules currently causes a fatal error:
$ git checkout --recurse-submodules branch-with-nested-submodules
fatal: exec '--super-prefix=submodule/nested/': cd to 'nested'
failed: No such file or directory
error: Submodule 'nested' could not be updated.
error: Submodule 'submodule/nested' cannot checkout new HEAD.
error: Submodule 'submodule' could not be updated.
M submodule
Switched to branch 'branch-with-nested-submodules'
The checkout succeeds but the worktree and index of the first level
submodule are left empty:
$ cd submodule
$ git -c status.submoduleSummary=1 status
HEAD detached at b3ce885
Changes to be committed:
(use "git restore --staged <file>..." to unstage)
deleted: .gitmodules
deleted: first.t
deleted: nested
fatal: not a git repository: 'nested/.git'
Submodule changes to be committed:
* nested 1e96f59...0000000:
$ git ls-files -s
$ # empty
$ ls -A
.git
The reason for the fatal error during the checkout is that a child git
process tries to cd into the yet unexisting nested submodule directory.
The sequence is the following:
1. The main git process (the one running in the superproject) eventually
reaches write_entry() in entry.c, which creates the first level
submodule directory and then calls submodule_move_head() in submodule.c,
which spawns `git read-tree` in the submodule directory.
2. The first child git process (the one in the submodule of the
superproject) eventually calls check_submodule_move_head() at
unpack_trees.c:2021, which calls submodule_move_head in dry-run mode,
which spawns `git read-tree` in the nested submodule directory.
3. The second child git process tries to chdir() in the yet unexisting
nested submodule directory in start_command() at run-command.c:829 and
dies before exec'ing.
The reason why check_submodule_move_head() is reached in the first child
and not in the main process is that it is inside an
if(submodule_from_ce()) construct, and submodule_from_ce() returns a
valid struct submodule pointer, whereas it returns a null pointer in the
main git process.
The reason why submodule_from_ce() returns a null pointer in the main
git process is because the call to cache_lookup_path() in config_from()
(called from submodule_from_path() in submodule_from_ce()) returns a
null pointer since the hashmap "for_path" in the submodule_cache of
the_repository is not yet populated. It is not populated because both
repo_get_oid(repo, GITMODULES_INDEX, &oid) and repo_get_oid(repo,
GITMODULES_HEAD, &oid) in config_from_gitmodules() at
submodule-config.c:639-640 return -1, as at this stage of the operation,
neither the HEAD of the superproject nor its index contain any
.gitmodules file.
In contrast, in the first child the hashmap is populated because
repo_get_oid(repo, GITMODULES_HEAD, &oid) returns 0 as the HEAD of the
first level submodule, i.e. .git/modules/submodule/HEAD, points to a
commit where .gitmodules is present and records 'nested' as a submodule.
Fix this bug by checking that the submodule directory exists before
calling check_submodule_move_head() in merged_entry() in the `if(!old)`
branch, i.e. if going from a commit with no submodule to a commit with a
submodule present.
Also protect the other call to check_submodule_move_head() in
merged_entry() the same way as it is safer, even though the `else if
(!(old->ce_flags & CE_CONFLICTED))` branch of the code is not at play in
the present bug.
The other calls to check_submodule_move_head() in other functions in
unpack_trees.c are all already protected by calls to lstat() somewhere
in
the program flow so we don't need additional protection for them.
All commands in the unpack_trees machinery are affected, i.e. checkout,
reset and read-tree when called with the --recurse-submodules flag.
This bug was first reported in [1].
[1]
https://lore.kernel.org/git/7437BB59-4605-48EC-B05E-E2BDB2D9DABC@gmail.com/
Reported-by: Philippe Blain <levraiphilippeblain@gmail.com>
Reported-by: Damien Robert <damien.olivier.robert@gmail.com>
Signed-off-by: Philippe Blain <levraiphilippeblain@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-17 07:53:05 +03:00
|
|
|
if (submodule_from_ce(ce) && file_exists(ce->name)) {
|
2017-03-15 00:46:39 +03:00
|
|
|
int ret = check_submodule_move_head(ce, oid_to_hex(&old->oid),
|
|
|
|
oid_to_hex(&ce->oid),
|
|
|
|
o);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
}
|
"reset --merge": fix unmerged case
Commit 9e8ecea (Add 'merge' mode to 'git reset', 2008-12-01) disallowed
"git reset --merge" when there was unmerged entries. But it wished if
unmerged entries were reset as if --hard (instead of --merge) has been
used. This makes sense because all "mergy" operations makes sure that
any path involved in the merge does not have local modifications before
starting, so resetting such a path away won't lose any information.
The previous commit changed the behavior of --merge to accept resetting
unmerged entries if they are reset to a different state than HEAD, but it
did not reset the changes in the work tree, leaving the conflict markers
in the resulting file in the work tree.
Fix it by doing three things:
- Update the documentation to match the wish of original "reset --merge"
better, namely, "An unmerged entry is a sign that the path didn't have
any local modification and can be safely resetted to whatever the new
HEAD records";
- Update read_index_unmerged(), which reads the index file into the cache
while dropping any higher-stage entries down to stage #0, not to copy
the object name from the higher stage entry. The code used to take the
object name from the a stage entry ("base" if you happened to have
stage #1, or "ours" if both sides added, etc.), which essentially meant
that you are getting random results depending on what the merge did.
The _only_ reason we want to keep a previously unmerged entry in the
index at stage #0 is so that we don't forget the fact that we have
corresponding file in the work tree in order to be able to remove it
when the tree we are resetting to does not have the path. In order to
differentiate such an entry from ordinary cache entry, the cache entry
added by read_index_unmerged() is marked as CE_CONFLICTED.
- Update merged_entry() and deleted_entry() so that they pay attention to
cache entries marked as CE_CONFLICTED. They are previously unmerged
entries, and the files in the work tree that correspond to them are
resetted away by oneway_merge() to the version from the tree we are
resetting to.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-01 10:04:04 +03:00
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Previously unmerged entry left as an existence
|
|
|
|
* marker by read_index_unmerged();
|
|
|
|
*/
|
|
|
|
invalidate_ce_path(old, o);
|
2006-07-30 22:26:15 +04:00
|
|
|
}
|
|
|
|
|
2019-09-09 14:56:15 +03:00
|
|
|
if (do_add_entry(o, merge, update, CE_STAGEMASK) < 0)
|
|
|
|
return -1;
|
2006-07-30 22:26:15 +04:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2013-06-02 19:46:55 +04:00
|
|
|
static int deleted_entry(const struct cache_entry *ce,
|
|
|
|
const struct cache_entry *old,
|
|
|
|
struct unpack_trees_options *o)
|
2006-07-30 22:26:15 +04:00
|
|
|
{
|
2008-03-07 05:12:28 +03:00
|
|
|
/* Did it exist in the index? */
|
|
|
|
if (!old) {
|
2010-08-11 12:38:06 +04:00
|
|
|
if (verify_absent(ce, ERROR_WOULD_LOSE_UNTRACKED_REMOVED, o))
|
2008-02-07 19:39:48 +03:00
|
|
|
return -1;
|
2008-03-07 05:12:28 +03:00
|
|
|
return 0;
|
|
|
|
}
|
"reset --merge": fix unmerged case
Commit 9e8ecea (Add 'merge' mode to 'git reset', 2008-12-01) disallowed
"git reset --merge" when there was unmerged entries. But it wished if
unmerged entries were reset as if --hard (instead of --merge) has been
used. This makes sense because all "mergy" operations makes sure that
any path involved in the merge does not have local modifications before
starting, so resetting such a path away won't lose any information.
The previous commit changed the behavior of --merge to accept resetting
unmerged entries if they are reset to a different state than HEAD, but it
did not reset the changes in the work tree, leaving the conflict markers
in the resulting file in the work tree.
Fix it by doing three things:
- Update the documentation to match the wish of original "reset --merge"
better, namely, "An unmerged entry is a sign that the path didn't have
any local modification and can be safely resetted to whatever the new
HEAD records";
- Update read_index_unmerged(), which reads the index file into the cache
while dropping any higher-stage entries down to stage #0, not to copy
the object name from the higher stage entry. The code used to take the
object name from the a stage entry ("base" if you happened to have
stage #1, or "ours" if both sides added, etc.), which essentially meant
that you are getting random results depending on what the merge did.
The _only_ reason we want to keep a previously unmerged entry in the
index at stage #0 is so that we don't forget the fact that we have
corresponding file in the work tree in order to be able to remove it
when the tree we are resetting to does not have the path. In order to
differentiate such an entry from ordinary cache entry, the cache entry
added by read_index_unmerged() is marked as CE_CONFLICTED.
- Update merged_entry() and deleted_entry() so that they pay attention to
cache entries marked as CE_CONFLICTED. They are previously unmerged
entries, and the files in the work tree that correspond to them are
resetted away by oneway_merge() to the version from the tree we are
resetting to.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-01 10:04:04 +03:00
|
|
|
if (!(old->ce_flags & CE_CONFLICTED) && verify_uptodate(old, o))
|
2008-03-07 05:12:28 +03:00
|
|
|
return -1;
|
|
|
|
add_entry(o, ce, CE_REMOVE, 0);
|
2008-03-06 23:26:14 +03:00
|
|
|
invalidate_ce_path(ce, o);
|
2006-07-30 22:26:15 +04:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2013-06-02 19:46:55 +04:00
|
|
|
static int keep_entry(const struct cache_entry *ce,
|
|
|
|
struct unpack_trees_options *o)
|
2006-07-30 22:26:15 +04:00
|
|
|
{
|
2008-03-07 05:12:28 +03:00
|
|
|
add_entry(o, ce, 0, 0);
|
2018-08-18 17:41:27 +03:00
|
|
|
if (ce_stage(ce))
|
|
|
|
invalidate_ce_path(ce, o);
|
2006-07-30 22:26:15 +04:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
#if DBRT_DEBUG
|
|
|
|
static void show_stage_entry(FILE *o,
|
|
|
|
const char *label, const struct cache_entry *ce)
|
|
|
|
{
|
|
|
|
if (!ce)
|
|
|
|
fprintf(o, "%s (missing)\n", label);
|
|
|
|
else
|
|
|
|
fprintf(o, "%s%06o %s %d\t%s\n",
|
|
|
|
label,
|
2008-01-15 03:03:17 +03:00
|
|
|
ce->ce_mode,
|
2016-09-05 23:07:52 +03:00
|
|
|
oid_to_hex(&ce->oid),
|
2006-07-30 22:26:15 +04:00
|
|
|
ce_stage(ce),
|
|
|
|
ce->name);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2013-06-02 19:46:56 +04:00
|
|
|
int threeway_merge(const struct cache_entry * const *stages,
|
|
|
|
struct unpack_trees_options *o)
|
2006-07-30 22:26:15 +04:00
|
|
|
{
|
2013-06-02 19:46:55 +04:00
|
|
|
const struct cache_entry *index;
|
|
|
|
const struct cache_entry *head;
|
|
|
|
const struct cache_entry *remote = stages[o->head_idx + 1];
|
2006-07-30 22:26:15 +04:00
|
|
|
int count;
|
|
|
|
int head_match = 0;
|
|
|
|
int remote_match = 0;
|
|
|
|
|
|
|
|
int df_conflict_head = 0;
|
|
|
|
int df_conflict_remote = 0;
|
|
|
|
|
|
|
|
int any_anc_missing = 0;
|
|
|
|
int no_anc_exists = 1;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 1; i < o->head_idx; i++) {
|
2007-04-07 16:49:19 +04:00
|
|
|
if (!stages[i] || stages[i] == o->df_conflict_entry)
|
2006-07-30 22:26:15 +04:00
|
|
|
any_anc_missing = 1;
|
2007-04-07 16:49:19 +04:00
|
|
|
else
|
2006-07-30 22:26:15 +04:00
|
|
|
no_anc_exists = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
index = stages[0];
|
|
|
|
head = stages[o->head_idx];
|
|
|
|
|
|
|
|
if (head == o->df_conflict_entry) {
|
|
|
|
df_conflict_head = 1;
|
|
|
|
head = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (remote == o->df_conflict_entry) {
|
|
|
|
df_conflict_remote = 1;
|
|
|
|
remote = NULL;
|
|
|
|
}
|
|
|
|
|
2009-10-12 00:38:11 +04:00
|
|
|
/*
|
|
|
|
* First, if there's a #16 situation, note that to prevent #13
|
2006-07-30 22:26:15 +04:00
|
|
|
* and #14.
|
|
|
|
*/
|
|
|
|
if (!same(remote, head)) {
|
|
|
|
for (i = 1; i < o->head_idx; i++) {
|
|
|
|
if (same(stages[i], head)) {
|
|
|
|
head_match = i;
|
|
|
|
}
|
|
|
|
if (same(stages[i], remote)) {
|
|
|
|
remote_match = i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-10-12 00:38:11 +04:00
|
|
|
/*
|
|
|
|
* We start with cases where the index is allowed to match
|
2006-07-30 22:26:15 +04:00
|
|
|
* something other than the head: #14(ALT) and #2ALT, where it
|
|
|
|
* is permitted to match the result instead.
|
|
|
|
*/
|
|
|
|
/* #14, #14ALT, #2ALT */
|
|
|
|
if (remote && !df_conflict_head && head_match && !remote_match) {
|
|
|
|
if (index && !same(index, remote) && !same(index, head))
|
2014-08-13 04:03:18 +04:00
|
|
|
return reject_merge(index, o);
|
2006-07-30 22:26:15 +04:00
|
|
|
return merged_entry(remote, index, o);
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* If we have an entry in the index cache, then we want to
|
|
|
|
* make sure that it matches head.
|
|
|
|
*/
|
2008-02-07 19:40:02 +03:00
|
|
|
if (index && !same(index, head))
|
2014-08-13 04:03:18 +04:00
|
|
|
return reject_merge(index, o);
|
2006-07-30 22:26:15 +04:00
|
|
|
|
|
|
|
if (head) {
|
|
|
|
/* #5ALT, #15 */
|
|
|
|
if (same(head, remote))
|
|
|
|
return merged_entry(head, index, o);
|
|
|
|
/* #13, #3ALT */
|
|
|
|
if (!df_conflict_remote && remote_match && !head_match)
|
|
|
|
return merged_entry(head, index, o);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* #1 */
|
2008-03-07 05:12:28 +03:00
|
|
|
if (!head && !remote && any_anc_missing)
|
2006-07-30 22:26:15 +04:00
|
|
|
return 0;
|
|
|
|
|
2009-10-12 00:38:11 +04:00
|
|
|
/*
|
|
|
|
* Under the "aggressive" rule, we resolve mostly trivial
|
2006-07-30 22:26:15 +04:00
|
|
|
* cases that we historically had git-merge-one-file resolve.
|
|
|
|
*/
|
|
|
|
if (o->aggressive) {
|
2009-10-12 00:38:11 +04:00
|
|
|
int head_deleted = !head;
|
|
|
|
int remote_deleted = !remote;
|
2013-06-02 19:46:55 +04:00
|
|
|
const struct cache_entry *ce = NULL;
|
2007-04-07 16:49:19 +04:00
|
|
|
|
|
|
|
if (index)
|
2007-07-17 22:28:28 +04:00
|
|
|
ce = index;
|
2007-04-07 16:49:19 +04:00
|
|
|
else if (head)
|
2007-07-17 22:28:28 +04:00
|
|
|
ce = head;
|
2007-04-07 16:49:19 +04:00
|
|
|
else if (remote)
|
2007-07-17 22:28:28 +04:00
|
|
|
ce = remote;
|
2007-04-07 16:49:19 +04:00
|
|
|
else {
|
|
|
|
for (i = 1; i < o->head_idx; i++) {
|
|
|
|
if (stages[i] && stages[i] != o->df_conflict_entry) {
|
2007-07-17 22:28:28 +04:00
|
|
|
ce = stages[i];
|
2007-04-07 16:49:19 +04:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-07-30 22:26:15 +04:00
|
|
|
/*
|
|
|
|
* Deleted in both.
|
|
|
|
* Deleted in one and unchanged in the other.
|
|
|
|
*/
|
|
|
|
if ((head_deleted && remote_deleted) ||
|
|
|
|
(head_deleted && remote && remote_match) ||
|
|
|
|
(remote_deleted && head && head_match)) {
|
|
|
|
if (index)
|
|
|
|
return deleted_entry(index, index, o);
|
2008-03-07 05:12:28 +03:00
|
|
|
if (ce && !head_deleted) {
|
2010-08-11 12:38:06 +04:00
|
|
|
if (verify_absent(ce, ERROR_WOULD_LOSE_UNTRACKED_REMOVED, o))
|
2008-02-07 19:39:48 +03:00
|
|
|
return -1;
|
|
|
|
}
|
2006-07-30 22:26:15 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Added in both, identically.
|
|
|
|
*/
|
|
|
|
if (no_anc_exists && head && remote && same(head, remote))
|
|
|
|
return merged_entry(head, index, o);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Below are "no merge" cases, which require that the index be
|
|
|
|
* up-to-date to avoid the files getting overwritten with
|
|
|
|
* conflict resolution files.
|
|
|
|
*/
|
|
|
|
if (index) {
|
2008-02-07 19:39:48 +03:00
|
|
|
if (verify_uptodate(index, o))
|
|
|
|
return -1;
|
2006-07-30 22:26:15 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
o->nontrivial_merge = 1;
|
|
|
|
|
2007-04-07 16:42:01 +04:00
|
|
|
/* #2, #3, #4, #6, #7, #9, #10, #11. */
|
2006-07-30 22:26:15 +04:00
|
|
|
count = 0;
|
|
|
|
if (!head_match || !remote_match) {
|
|
|
|
for (i = 1; i < o->head_idx; i++) {
|
2007-04-07 16:49:19 +04:00
|
|
|
if (stages[i] && stages[i] != o->df_conflict_entry) {
|
2007-04-02 11:06:12 +04:00
|
|
|
keep_entry(stages[i], o);
|
2006-07-30 22:26:15 +04:00
|
|
|
count++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#if DBRT_DEBUG
|
|
|
|
else {
|
|
|
|
fprintf(stderr, "read-tree: warning #16 detected\n");
|
|
|
|
show_stage_entry(stderr, "head ", stages[head_match]);
|
|
|
|
show_stage_entry(stderr, "remote ", stages[remote_match]);
|
|
|
|
}
|
|
|
|
#endif
|
2007-04-02 11:06:12 +04:00
|
|
|
if (head) { count += keep_entry(head, o); }
|
|
|
|
if (remote) { count += keep_entry(remote, o); }
|
2006-07-30 22:26:15 +04:00
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Two-way merge.
|
|
|
|
*
|
|
|
|
* The rule is to "carry forward" what is in the index without losing
|
2009-10-24 12:31:32 +04:00
|
|
|
* information across a "fast-forward", favoring a successful merge
|
2006-07-30 22:26:15 +04:00
|
|
|
* over a merge failure when it makes sense. For details of the
|
|
|
|
* "carry forward" rule, please see <Documentation/git-read-tree.txt>.
|
|
|
|
*
|
|
|
|
*/
|
2013-06-02 19:46:56 +04:00
|
|
|
int twoway_merge(const struct cache_entry * const *src,
|
|
|
|
struct unpack_trees_options *o)
|
2006-07-30 22:26:15 +04:00
|
|
|
{
|
2013-06-02 19:46:55 +04:00
|
|
|
const struct cache_entry *current = src[0];
|
|
|
|
const struct cache_entry *oldtree = src[1];
|
|
|
|
const struct cache_entry *newtree = src[2];
|
2006-07-30 22:26:15 +04:00
|
|
|
|
|
|
|
if (o->merge_size != 2)
|
|
|
|
return error("Cannot do a twoway merge of %d trees",
|
|
|
|
o->merge_size);
|
|
|
|
|
2007-04-03 03:29:56 +04:00
|
|
|
if (oldtree == o->df_conflict_entry)
|
|
|
|
oldtree = NULL;
|
|
|
|
if (newtree == o->df_conflict_entry)
|
|
|
|
newtree = NULL;
|
|
|
|
|
2006-07-30 22:26:15 +04:00
|
|
|
if (current) {
|
unpack-trees: fix "read-tree -u --reset A B" with conflicted index
When we call "read-tree --reset -u HEAD ORIG_HEAD", the first thing we
do with the index is to call read_cache_unmerged. Originally that
would read the index, leaving aside any unmerged entries. However, as
of d1a43f2 (reset --hard/read-tree --reset -u: remove unmerged new
paths, 2008-10-15), it actually creates a new cache entry to serve as
a placeholder, so that we later know to update the working tree.
However, we later noticed that the sha1 of that unmerged entry was
just copied from some higher stage, leaving you with random content in
the index. That was fixed by e11d7b5 ("reset --merge": fix unmerged
case, 2009-12-31), which instead puts the null sha1 into the newly
created entry, and sets a CE_CONFLICTED flag. At the same time, it
teaches the unpack-trees machinery to pay attention to this flag, so
that oneway_merge throws away the current value.
However, it did not update the code paths for twoway_merge, which is
where we end up in the two-way read-tree with --reset. We notice that
the HEAD and ORIG_HEAD versions are the same, and say "oh, we can just
reuse the current version". But that's not true. The current version
is bogus.
Notice this case and make sure we do not keep the bogus entry; either
we do not have that path in the tree we are moving to (i.e. remove
it), or we want to have the cache entry we created for the tree we are
moving to (i.e. resolve by explicitly saying the "newtree" version is
what we want).
[jc: this is from the almost year-old $gmane/212316]
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-12-30 00:51:54 +04:00
|
|
|
if (current->ce_flags & CE_CONFLICTED) {
|
|
|
|
if (same(oldtree, newtree) || o->reset) {
|
|
|
|
if (!newtree)
|
|
|
|
return deleted_entry(current, current, o);
|
|
|
|
else
|
|
|
|
return merged_entry(newtree, current, o);
|
|
|
|
}
|
2014-08-13 04:03:18 +04:00
|
|
|
return reject_merge(current, o);
|
2014-08-13 04:00:45 +04:00
|
|
|
} else if ((!oldtree && !newtree) || /* 4 and 5 */
|
unpack-trees: fix "read-tree -u --reset A B" with conflicted index
When we call "read-tree --reset -u HEAD ORIG_HEAD", the first thing we
do with the index is to call read_cache_unmerged. Originally that
would read the index, leaving aside any unmerged entries. However, as
of d1a43f2 (reset --hard/read-tree --reset -u: remove unmerged new
paths, 2008-10-15), it actually creates a new cache entry to serve as
a placeholder, so that we later know to update the working tree.
However, we later noticed that the sha1 of that unmerged entry was
just copied from some higher stage, leaving you with random content in
the index. That was fixed by e11d7b5 ("reset --merge": fix unmerged
case, 2009-12-31), which instead puts the null sha1 into the newly
created entry, and sets a CE_CONFLICTED flag. At the same time, it
teaches the unpack-trees machinery to pay attention to this flag, so
that oneway_merge throws away the current value.
However, it did not update the code paths for twoway_merge, which is
where we end up in the two-way read-tree with --reset. We notice that
the HEAD and ORIG_HEAD versions are the same, and say "oh, we can just
reuse the current version". But that's not true. The current version
is bogus.
Notice this case and make sure we do not keep the bogus entry; either
we do not have that path in the tree we are moving to (i.e. remove
it), or we want to have the cache entry we created for the tree we are
moving to (i.e. resolve by explicitly saying the "newtree" version is
what we want).
[jc: this is from the almost year-old $gmane/212316]
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-12-30 00:51:54 +04:00
|
|
|
(!oldtree && newtree &&
|
|
|
|
same(current, newtree)) || /* 6 and 7 */
|
|
|
|
(oldtree && newtree &&
|
|
|
|
same(oldtree, newtree)) || /* 14 and 15 */
|
|
|
|
(oldtree && newtree &&
|
|
|
|
!same(oldtree, newtree) && /* 18 and 19 */
|
|
|
|
same(current, newtree))) {
|
2007-04-02 11:06:12 +04:00
|
|
|
return keep_entry(current, o);
|
2014-08-13 04:00:45 +04:00
|
|
|
} else if (oldtree && !newtree && same(current, oldtree)) {
|
2006-07-30 22:26:15 +04:00
|
|
|
/* 10 or 11 */
|
|
|
|
return deleted_entry(oldtree, current, o);
|
2014-08-13 04:00:45 +04:00
|
|
|
} else if (oldtree && newtree &&
|
2006-07-30 22:26:15 +04:00
|
|
|
same(current, oldtree) && !same(current, newtree)) {
|
|
|
|
/* 20 or 21 */
|
|
|
|
return merged_entry(newtree, current, o);
|
2014-08-13 04:00:45 +04:00
|
|
|
} else
|
2014-08-13 04:03:18 +04:00
|
|
|
return reject_merge(current, o);
|
2006-07-30 22:26:15 +04:00
|
|
|
}
|
checkout: do not lose staged removal
The logic to checkout a different commit implements the safety to never
lose user's local changes. For example, switching from a commit to
another commit, when you have changed a path that is different between
them, need to merge your changes to the version from the switched-to
commit, which you may not necessarily be able to resolve easily. By
default, "git checkout" refused to switch branches, to give you a chance
to stash your local changes (or use "-m" to merge, accepting the risks of
getting conflicts).
This safety, however, had one deliberate hole since early June 2005. When
your local change was to remove a path (and optionally to stage that
removal), the command checked out the path from the switched-to commit
nevertheless.
This was to allow an initial checkout to happen smoothly (e.g. an initial
checkout is done by starting with an empty index and switching from the
commit at the HEAD to the same commit). We can tighten the rule slightly
to allow this special case to pass, without losing sight of removal
explicitly done by the user, by noticing if the index is truly empty when
the operation begins.
For historical background, see:
http://thread.gmane.org/gmane.comp.version-control.git/4641/focus=4646
This case is marked as *0* in the message, which both Linus and I said "it
feels somewhat wrong but otherwise we cannot start from an empty index".
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-09-08 06:49:25 +04:00
|
|
|
else if (newtree) {
|
|
|
|
if (oldtree && !o->initial_checkout) {
|
|
|
|
/*
|
|
|
|
* deletion of the path was staged;
|
|
|
|
*/
|
|
|
|
if (same(oldtree, newtree))
|
|
|
|
return 1;
|
|
|
|
return reject_merge(oldtree, o);
|
|
|
|
}
|
2006-07-30 22:26:15 +04:00
|
|
|
return merged_entry(newtree, current, o);
|
checkout: do not lose staged removal
The logic to checkout a different commit implements the safety to never
lose user's local changes. For example, switching from a commit to
another commit, when you have changed a path that is different between
them, need to merge your changes to the version from the switched-to
commit, which you may not necessarily be able to resolve easily. By
default, "git checkout" refused to switch branches, to give you a chance
to stash your local changes (or use "-m" to merge, accepting the risks of
getting conflicts).
This safety, however, had one deliberate hole since early June 2005. When
your local change was to remove a path (and optionally to stage that
removal), the command checked out the path from the switched-to commit
nevertheless.
This was to allow an initial checkout to happen smoothly (e.g. an initial
checkout is done by starting with an empty index and switching from the
commit at the HEAD to the same commit). We can tighten the rule slightly
to allow this special case to pass, without losing sight of removal
explicitly done by the user, by noticing if the index is truly empty when
the operation begins.
For historical background, see:
http://thread.gmane.org/gmane.comp.version-control.git/4641/focus=4646
This case is marked as *0* in the message, which both Linus and I said "it
feels somewhat wrong but otherwise we cannot start from an empty index".
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-09-08 06:49:25 +04:00
|
|
|
}
|
2007-08-10 23:31:20 +04:00
|
|
|
return deleted_entry(oldtree, current, o);
|
2006-07-30 22:26:15 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Bind merge.
|
|
|
|
*
|
|
|
|
* Keep the index entries at stage0, collapse stage1 but make sure
|
|
|
|
* stage0 does not have anything there.
|
|
|
|
*/
|
2013-06-02 19:46:56 +04:00
|
|
|
int bind_merge(const struct cache_entry * const *src,
|
|
|
|
struct unpack_trees_options *o)
|
2006-07-30 22:26:15 +04:00
|
|
|
{
|
2013-06-02 19:46:55 +04:00
|
|
|
const struct cache_entry *old = src[0];
|
|
|
|
const struct cache_entry *a = src[1];
|
2006-07-30 22:26:15 +04:00
|
|
|
|
|
|
|
if (o->merge_size != 1)
|
2012-04-30 04:28:45 +04:00
|
|
|
return error("Cannot do a bind merge of %d trees",
|
2006-07-30 22:26:15 +04:00
|
|
|
o->merge_size);
|
|
|
|
if (a && old)
|
2019-03-22 12:31:36 +03:00
|
|
|
return o->quiet ? -1 :
|
unpack-trees: support super-prefix option
In the future we want to support working tree operations within submodules,
e.g. "git checkout --recurse-submodules", which will update the submodule
to the commit as recorded in its superproject. In the submodule the
unpack-tree operation is carried out as usual, but the reporting to the
user needs to prefix any path with the superproject. The mechanism for
this is the super-prefix. (see 74866d757, git: make super-prefix option)
Add support for the super-prefix option for commands that unpack trees
by wrapping any path output in unpacking trees in the newly introduced
super_prefixed function. This new function prefixes any path with the
super-prefix if there is one. Assuming the submodule case doesn't happen
in the majority of the cases, we'd want to have a fast behavior for no
super prefix, i.e. no reallocation/copying, but just returning path.
Another aspect of introducing the `super_prefixed` function is to consider
who owns the memory and if this is the right place where the path gets
modified. As the super prefix ought to change the output behavior only and
not the actual unpack tree part, it is fine to be that late in the line.
As we get passed in 'const char *path', we cannot change the path itself,
which means in case of a super prefix we have to copy over the path.
We need two static buffers in that function as the error messages
contain at most two paths.
For testing purposes enable it in read-tree, which has no output
of paths other than an unpack-trees.c. These are all converted in
this patch.
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-01-18 04:05:20 +03:00
|
|
|
error(ERRORMSG(o, ERROR_BIND_OVERLAP),
|
|
|
|
super_prefixed(a->name),
|
|
|
|
super_prefixed(old->name));
|
2006-07-30 22:26:15 +04:00
|
|
|
if (!a)
|
2007-04-02 11:06:12 +04:00
|
|
|
return keep_entry(old, o);
|
2006-07-30 22:26:15 +04:00
|
|
|
else
|
|
|
|
return merged_entry(a, NULL, o);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* One-way merge.
|
|
|
|
*
|
|
|
|
* The rule is:
|
|
|
|
* - take the stat information from stage0, take the data from stage1
|
|
|
|
*/
|
2013-06-02 19:46:56 +04:00
|
|
|
int oneway_merge(const struct cache_entry * const *src,
|
|
|
|
struct unpack_trees_options *o)
|
2006-07-30 22:26:15 +04:00
|
|
|
{
|
2013-06-02 19:46:55 +04:00
|
|
|
const struct cache_entry *old = src[0];
|
|
|
|
const struct cache_entry *a = src[1];
|
2006-07-30 22:26:15 +04:00
|
|
|
|
|
|
|
if (o->merge_size != 1)
|
|
|
|
return error("Cannot do a oneway merge of %d trees",
|
|
|
|
o->merge_size);
|
|
|
|
|
checkout -f: deal with a D/F conflict entry correctly
When we switch branches with "checkout -f", unpack_trees() feeds two
cache_entries to oneway_merge() function in its src[] array argument. The
zeroth entry comes from the current index, and the first entry represents
what the merge result should be, taken from the tree recorded in the
commit we are switching to.
When we have a blob (either regular file or a symlink) in the index and in
the work tree at path "foo", and the switched-to tree has "foo/bar",
i.e. "foo" becomes a directory, src[0] is obviously that blob currently
registered at "foo". Even though we do not have anything at "foo" in the
switched-to tree, src[1] is _not_ NULL in this case.
The unpack_trees() machinery places a special marker df_conflict_entry
to signal that no blob exists at "foo", but it will become a directory
that may have somthing underneath it (namely "foo/bar"), so a usual 3-way
merge can notice the situation.
But oneway_merge() codepath failed to notice this and passed the special
marker directly to merged_entry(). This happens to remove the "foo" in
the end because the df_conflict_entry does not have any name (hence the
"error" message) and its addition in add_index_entry() is rejected, but it
is wrong.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-07-18 23:26:38 +04:00
|
|
|
if (!a || a == o->df_conflict_entry)
|
2006-07-30 22:26:15 +04:00
|
|
|
return deleted_entry(old, old, o);
|
2008-03-07 05:12:28 +03:00
|
|
|
|
2006-07-30 22:26:15 +04:00
|
|
|
if (old && same(old, a)) {
|
2008-03-07 05:12:28 +03:00
|
|
|
int update = 0;
|
2019-11-20 11:32:17 +03:00
|
|
|
if (o->reset && o->update && !ce_uptodate(old) && !ce_skip_worktree(old) &&
|
|
|
|
!(old->ce_flags & CE_FSMONITOR_VALID)) {
|
2006-07-30 22:26:15 +04:00
|
|
|
struct stat st;
|
|
|
|
if (lstat(old->name, &st) ||
|
2009-12-14 14:43:58 +03:00
|
|
|
ie_match_stat(o->src_index, old, &st, CE_MATCH_IGNORE_VALID|CE_MATCH_IGNORE_SKIP_WORKTREE))
|
2008-03-07 05:12:28 +03:00
|
|
|
update |= CE_UPDATE;
|
2006-07-30 22:26:15 +04:00
|
|
|
}
|
2018-01-05 23:03:03 +03:00
|
|
|
if (o->update && S_ISGITLINK(old->ce_mode) &&
|
|
|
|
should_update_submodules() && !verify_uptodate(old, o))
|
|
|
|
update |= CE_UPDATE;
|
2019-03-18 14:38:22 +03:00
|
|
|
add_entry(o, old, update, CE_STAGEMASK);
|
2008-03-07 05:12:28 +03:00
|
|
|
return 0;
|
2006-07-30 22:26:15 +04:00
|
|
|
}
|
|
|
|
return merged_entry(a, old, o);
|
|
|
|
}
|
2021-03-03 14:16:42 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Merge worktree and untracked entries in a stash entry.
|
|
|
|
*
|
|
|
|
* Ignore all index entries. Collapse remaining trees but make sure that they
|
|
|
|
* don't have any conflicting files.
|
|
|
|
*/
|
|
|
|
int stash_worktree_untracked_merge(const struct cache_entry * const *src,
|
|
|
|
struct unpack_trees_options *o)
|
|
|
|
{
|
|
|
|
const struct cache_entry *worktree = src[1];
|
|
|
|
const struct cache_entry *untracked = src[2];
|
|
|
|
|
|
|
|
if (o->merge_size != 2)
|
|
|
|
BUG("invalid merge_size: %d", o->merge_size);
|
|
|
|
|
|
|
|
if (worktree && untracked)
|
|
|
|
return error(_("worktree and untracked commit have duplicate entries: %s"),
|
|
|
|
super_prefixed(worktree->name));
|
|
|
|
|
|
|
|
return merged_entry(worktree ? worktree : untracked, NULL, o);
|
|
|
|
}
|