Merge branch 'lt/case-insensitive'

* lt/case-insensitive: Make git-add behave more sensibly in a case-insensitive environment When adding files to the index, add support for case-independent matches Make unpack-tree update removed files before any updated files Make branch merging aware of underlying case-insensitive filsystems Add 'core.ignorecase' option Make hash_name_lookup able to do case-independent lookups Make "index_name_exists()" return the cache_entry it found Move name hashing functions into a file of its own Make unpack_trees_options bit flags actual bitfields
2008-05-10 18:14:28 -07:00 · 2008-05-10 18:14:28 -07:00 · 380a742679
--- a/1
+++ b/1
@ -423,6 +423,7 @@ LIB_OBJS += log-tree.o
 LIB_OBJS += mailmap.o
 LIB_OBJS += match-trees.o
 LIB_OBJS += merge-file.o
+LIB_OBJS += name-hash.o
 LIB_OBJS += object.o
 LIB_OBJS += pack-check.o
 LIB_OBJS += pack-revindex.o
--- a/builtin-read-tree.c
+++ b/builtin-read-tree.c
@ -40,7 +40,7 @@ static int read_cache_unmerged(void)
 	for (i = 0; i < active_nr; i++) {
 		struct cache_entry *ce = active_cache[i];
 		if (ce_stage(ce)) {
-			remove_index_entry(ce);
+			remove_name_hash(ce);
 			if (last && !strcmp(ce->name, last->name))
 				continue;
 			cache_tree_invalidate_path(active_cache_tree, ce->name);
--- a/cache.h
+++ b/cache.h
@ -133,6 +133,7 @@ struct cache_entry {
 #define CE_UPDATE    (0x10000)
 #define CE_REMOVE    (0x20000)
 #define CE_UPTODATE  (0x40000)
+#define CE_ADDED     (0x80000)

 #define CE_HASHED    (0x100000)
 #define CE_UNHASHED  (0x200000)
@ -153,20 +154,6 @@ static inline void copy_cache_entry(struct cache_entry *dst, struct cache_entry
 	dst->ce_flags = (dst->ce_flags & ~CE_STATE_MASK) | state;
 }

-/*
- * We don't actually *remove* it, we can just mark it invalid so that
- * we won't find it in lookups.
- *
- * Not only would we have to search the lists (simple enough), but
- * we'd also have to rehash other hash buckets in case this makes the
- * hash bucket empty (common). So it's much better to just mark
- * it.
- */
-static inline void remove_index_entry(struct cache_entry *ce)
-{
-	ce->ce_flags |= CE_UNHASHED;
-}
-
 static inline unsigned create_ce_flags(size_t len, unsigned stage)
 {
 	if (len >= CE_NAMEMASK)
@ -241,6 +228,23 @@ struct index_state {

 extern struct index_state the_index;

+/* Name hashing */
+extern void add_name_hash(struct index_state *istate, struct cache_entry *ce);
+/*
+ * We don't actually *remove* it, we can just mark it invalid so that
+ * we won't find it in lookups.
+ *
+ * Not only would we have to search the lists (simple enough), but
+ * we'd also have to rehash other hash buckets in case this makes the
+ * hash bucket empty (common). So it's much better to just mark
+ * it.
+ */
+static inline void remove_name_hash(struct cache_entry *ce)
+{
+	ce->ce_flags |= CE_UNHASHED;
+}
+
+
 #ifndef NO_THE_INDEX_COMPATIBILITY_MACROS
 #define active_cache (the_index.cache)
 #define active_nr (the_index.cache_nr)
@ -261,7 +265,7 @@ extern struct index_state the_index;
 #define refresh_cache(flags) refresh_index(&the_index, (flags), NULL, NULL)
 #define ce_match_stat(ce, st, options) ie_match_stat(&the_index, (ce), (st), (options))
 #define ce_modified(ce, st, options) ie_modified(&the_index, (ce), (st), (options))
-#define cache_name_exists(name, namelen) index_name_exists(&the_index, (name), (namelen))
+#define cache_name_exists(name, namelen, igncase) index_name_exists(&the_index, (name), (namelen), (igncase))
 #endif

 enum object_type {
@ -351,7 +355,7 @@ extern int write_index(const struct index_state *, int newfd);
 extern int discard_index(struct index_state *);
 extern int unmerged_index(const struct index_state *);
 extern int verify_path(const char *path);
-extern int index_name_exists(struct index_state *istate, const char *name, int namelen);
+extern struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int igncase);
 extern int index_name_pos(const struct index_state *, const char *name, int namelen);
 #define ADD_CACHE_OK_TO_ADD 1		/* Ok to add */
 #define ADD_CACHE_OK_TO_REPLACE 2	/* Ok to replace file/directory */
@ -405,6 +409,7 @@ extern int delete_ref(const char *, const unsigned char *sha1);
 extern int trust_executable_bit;
 extern int quote_path_fully;
 extern int has_symlinks;
+extern int ignore_case;
 extern int assume_unchanged;
 extern int prefer_symlink_refs;
 extern int log_all_ref_updates;
--- a/config.c
+++ b/config.c
@ -350,6 +350,11 @@ int git_default_config(const char *var, const char *value)
 		return 0;
 	}

+	if (!strcmp(var, "core.ignorecase")) {
+		ignore_case = git_config_bool(var, value);
+		return 0;
+	}
+
 	if (!strcmp(var, "core.bare")) {
 		is_bare_repository_cfg = git_config_bool(var, value);
 		return 0;
--- a/dir.c
+++ b/dir.c
@ -389,7 +389,7 @@ static struct dir_entry *dir_entry_new(const char *pathname, int len)

 struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int len)
 {
-	if (cache_name_exists(pathname, len))
+	if (cache_name_exists(pathname, len, ignore_case))
 		return NULL;

 	ALLOC_GROW(dir->entries, dir->nr+1, dir->alloc);
--- a/environment.c
+++ b/environment.c
@ -14,6 +14,7 @@ char git_default_name[MAX_GITNAME];
 int trust_executable_bit = 1;
 int quote_path_fully = 1;
 int has_symlinks = 1;
+int ignore_case;
 int assume_unchanged;
 int prefer_symlink_refs;
 int is_bare_repository_cfg = -1; /* unspecified */
--- a/name-hash.c
+++ b/name-hash.c
@ -0,0 +1,119 @@
+/*
+ * name-hash.c
+ *
+ * Hashing names in the index state
+ *
+ * Copyright (C) 2008 Linus Torvalds
+ */
+#define NO_THE_INDEX_COMPATIBILITY_MACROS
+#include "cache.h"
+
+/*
+ * This removes bit 5 if bit 6 is set.
+ *
+ * That will make US-ASCII characters hash to their upper-case
+ * equivalent. We could easily do this one whole word at a time,
+ * but that's for future worries.
+ */
+static inline unsigned char icase_hash(unsigned char c)
+{
+	return c & ~((c & 0x40) >> 1);
+}
+
+static unsigned int hash_name(const char *name, int namelen)
+{
+	unsigned int hash = 0x123;
+
+	do {
+		unsigned char c = *name++;
+		c = icase_hash(c);
+		hash = hash*101 + c;
+	} while (--namelen);
+	return hash;
+}
+
+static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
+{
+	void **pos;
+	unsigned int hash;
+
+	if (ce->ce_flags & CE_HASHED)
+		return;
+	ce->ce_flags |= CE_HASHED;
+	ce->next = NULL;
+	hash = hash_name(ce->name, ce_namelen(ce));
+	pos = insert_hash(hash, ce, &istate->name_hash);
+	if (pos) {
+		ce->next = *pos;
+		*pos = ce;
+	}
+}
+
+static void lazy_init_name_hash(struct index_state *istate)
+{
+	int nr;
+
+	if (istate->name_hash_initialized)
+		return;
+	for (nr = 0; nr < istate->cache_nr; nr++)
+		hash_index_entry(istate, istate->cache[nr]);
+	istate->name_hash_initialized = 1;
+}
+
+void add_name_hash(struct index_state *istate, struct cache_entry *ce)
+{
+	ce->ce_flags &= ~CE_UNHASHED;
+	if (istate->name_hash_initialized)
+		hash_index_entry(istate, ce);
+}
+
+static int slow_same_name(const char *name1, int len1, const char *name2, int len2)
+{
+	if (len1 != len2)
+		return 0;
+
+	while (len1) {
+		unsigned char c1 = *name1++;
+		unsigned char c2 = *name2++;
+		len1--;
+		if (c1 != c2) {
+			c1 = toupper(c1);
+			c2 = toupper(c2);
+			if (c1 != c2)
+				return 0;
+		}
+	}
+	return 1;
+}
+
+static int same_name(const struct cache_entry *ce, const char *name, int namelen, int icase)
+{
+	int len = ce_namelen(ce);
+
+	/*
+	 * Always do exact compare, even if we want a case-ignoring comparison;
+	 * we do the quick exact one first, because it will be the common case.
+	 */
+	if (len == namelen && !cache_name_compare(name, namelen, ce->name, len))
+		return 1;
+
+	return icase && slow_same_name(name, namelen, ce->name, len);
+}
+
+struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int icase)
+{
+	unsigned int hash = hash_name(name, namelen);
+	struct cache_entry *ce;
+
+	lazy_init_name_hash(istate);
+	ce = lookup_hash(hash, &istate->name_hash);
+
+	while (ce) {
+		if (!(ce->ce_flags & CE_UNHASHED)) {
+			if (same_name(ce, name, namelen, icase))
+				return ce;
+		}
+		ce = ce->next;
+	}
+	return NULL;
+}
--- a/read-cache.c
+++ b/read-cache.c
@ -23,80 +23,21 @@

 struct index_state the_index;

-static unsigned int hash_name(const char *name, int namelen)
-{
-	unsigned int hash = 0x123;
-
-	do {
-		unsigned char c = *name++;
-		hash = hash*101 + c;
-	} while (--namelen);
-	return hash;
-}
-
-static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
-{
-	void **pos;
-	unsigned int hash;
-
-	if (ce->ce_flags & CE_HASHED)
-		return;
-	ce->ce_flags |= CE_HASHED;
-	ce->next = NULL;
-	hash = hash_name(ce->name, ce_namelen(ce));
-	pos = insert_hash(hash, ce, &istate->name_hash);
-	if (pos) {
-		ce->next = *pos;
-		*pos = ce;
-	}
-}
-
-static void lazy_init_name_hash(struct index_state *istate)
-{
-	int nr;
-
-	if (istate->name_hash_initialized)
-		return;
-	for (nr = 0; nr < istate->cache_nr; nr++)
-		hash_index_entry(istate, istate->cache[nr]);
-	istate->name_hash_initialized = 1;
-}
-
 static void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce)
 {
-	ce->ce_flags &= ~CE_UNHASHED;
 	istate->cache[nr] = ce;
-	if (istate->name_hash_initialized)
-		hash_index_entry(istate, ce);
+	add_name_hash(istate, ce);
 }

 static void replace_index_entry(struct index_state *istate, int nr, struct cache_entry *ce)
 {
 	struct cache_entry *old = istate->cache[nr];

-	remove_index_entry(old);
+	remove_name_hash(old);
 	set_index_entry(istate, nr, ce);
 	istate->cache_changed = 1;
 }

-int index_name_exists(struct index_state *istate, const char *name, int namelen)
-{
-	unsigned int hash = hash_name(name, namelen);
-	struct cache_entry *ce;
-
-	lazy_init_name_hash(istate);
-	ce = lookup_hash(hash, &istate->name_hash);
-
-	while (ce) {
-		if (!(ce->ce_flags & CE_UNHASHED)) {
-			if (!cache_name_compare(name, namelen, ce->name, ce->ce_flags))
-				return 1;
-		}
-		ce = ce->next;
-	}
-	return 0;
-}
-
 /*
 * This only updates the "non-critical" parts of the directory
 * cache, ie the parts that aren't tracked by GIT, and only used
@ -438,7 +379,7 @@ int remove_index_entry_at(struct index_state *istate, int pos)
 {
 	struct cache_entry *ce = istate->cache[pos];

-	remove_index_entry(ce);
+	remove_name_hash(ce);
 	istate->cache_changed = 1;
 	istate->cache_nr--;
 	if (pos >= istate->cache_nr)
@ -488,11 +429,43 @@ static int index_name_pos_also_unmerged(struct index_state *istate,
 	return pos;
 }

+static int different_name(struct cache_entry *ce, struct cache_entry *alias)
+{
+	int len = ce_namelen(ce);
+	return ce_namelen(alias) != len || memcmp(ce->name, alias->name, len);
+}
+
+/*
+ * If we add a filename that aliases in the cache, we will use the
+ * name that we already have - but we don't want to update the same
+ * alias twice, because that implies that there were actually two
+ * different files with aliasing names!
+ *
+ * So we use the CE_ADDED flag to verify that the alias was an old
+ * one before we accept it as
+ */
+static struct cache_entry *create_alias_ce(struct cache_entry *ce, struct cache_entry *alias)
+{
+	int len;
+	struct cache_entry *new;
+
+	if (alias->ce_flags & CE_ADDED)
+		die("Will not add file alias '%s' ('%s' already exists in index)", ce->name, alias->name);
+
+	/* Ok, create the new entry using the name of the existing alias */
+	len = ce_namelen(alias);
+	new = xcalloc(1, cache_entry_size(len));
+	memcpy(new->name, alias->name, len);
+	copy_cache_entry(new, ce);
+	free(ce);
+	return new;
+}
+
 int add_file_to_index(struct index_state *istate, const char *path, int verbose)
 {
-	int size, namelen, pos;
+	int size, namelen;
 	struct stat st;
-	struct cache_entry *ce;
+	struct cache_entry *ce, *alias;
 	unsigned ce_option = CE_MATCH_IGNORE_VALID|CE_MATCH_RACY_IS_DIRTY;

 	if (lstat(path, &st))
@ -525,18 +498,19 @@ int add_file_to_index(struct index_state *istate, const char *path, int verbose)
 		ce->ce_mode = ce_mode_from_stat(ent, st.st_mode);
 	}

-	pos = index_name_pos(istate, ce->name, namelen);
-	if (0 <= pos &&
-	    !ce_stage(istate->cache[pos]) &&
-	    !ie_match_stat(istate, istate->cache[pos], &st, ce_option)) {
+	alias = index_name_exists(istate, ce->name, ce_namelen(ce), ignore_case);
+	if (alias && !ce_stage(alias) && !ie_match_stat(istate, alias, &st, ce_option)) {
 		/* Nothing changed, really */
 		free(ce);
-		ce_mark_uptodate(istate->cache[pos]);
+		ce_mark_uptodate(alias);
+		alias->ce_flags |= CE_ADDED;
 		return 0;
 	}
-
 	if (index_path(ce->sha1, path, &st, 1))
 		die("unable to index file %s", path);
+	if (ignore_case && alias && different_name(ce, alias))
+		ce = create_alias_ce(ce, alias);
+	ce->ce_flags |= CE_ADDED;
 	if (add_index_entry(istate, ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE))
 		die("unable to add %s to index",path);
 	if (verbose)
--- a/unpack-trees.c
+++ b/unpack-trees.c
@ -79,16 +79,21 @@ static int check_updates(struct unpack_trees_options *o)
 	for (i = 0; i < index->cache_nr; i++) {
 		struct cache_entry *ce = index->cache[i];

-		if (ce->ce_flags & (CE_UPDATE | CE_REMOVE))
-			display_progress(progress, ++cnt);
 		if (ce->ce_flags & CE_REMOVE) {
+			display_progress(progress, ++cnt);
 			if (o->update)
 				unlink_entry(ce->name, last_symlink);
 			remove_index_entry_at(&o->result, i);
 			i--;
 			continue;
 		}
+	}
+
+	for (i = 0; i < index->cache_nr; i++) {
+		struct cache_entry *ce = index->cache[i];
+
 		if (ce->ce_flags & CE_UPDATE) {
+			display_progress(progress, ++cnt);
 			ce->ce_flags &= ~CE_UPDATE;
 			if (o->update) {
 				errs |= checkout_entry(ce, &state, NULL);
@ -520,6 +525,22 @@ static int verify_clean_subdirectory(struct cache_entry *ce, const char *action,
 	return cnt;
 }

+/*
+ * This gets called when there was no index entry for the tree entry 'dst',
+ * but we found a file in the working tree that 'lstat()' said was fine,
+ * and we're on a case-insensitive filesystem.
+ *
+ * See if we can find a case-insensitive match in the index that also
+ * matches the stat information, and assume it's that other file!
+ */
+static int icase_exists(struct unpack_trees_options *o, struct cache_entry *dst, struct stat *st)
+{
+	struct cache_entry *src;
+
+	src = index_name_exists(o->src_index, dst->name, ce_namelen(dst), 1);
+	return src && !ie_match_stat(o->src_index, src, st, CE_MATCH_IGNORE_VALID);
+}
+
 /*
 * We do not want to remove or overwrite a working tree file that
 * is not tracked, unless it is ignored.
@ -538,6 +559,17 @@ static int verify_absent(struct cache_entry *ce, const char *action,
 	if (!lstat(ce->name, &st)) {
 		int cnt;
 		int dtype = ce_to_dtype(ce);
+		struct cache_entry *result;
+
+		/*
+		 * It may be that the 'lstat()' succeeded even though
+		 * target 'ce' was absent, because there is an old
+		 * entry that is different only in case..
+		 *
+		 * Ignore that lstat() if it matches.
+		 */
+		if (ignore_case && icase_exists(o, ce, &st))
+			return 0;

 		if (o->dir && excluded(o->dir, ce->name, &dtype))
 			/*
@ -581,10 +613,9 @@ static int verify_absent(struct cache_entry *ce, const char *action,
 		 * delete this path, which is in a subdirectory that
 		 * is being replaced with a blob.
 		 */
-		cnt = index_name_pos(&o->result, ce->name, strlen(ce->name));
-		if (0 <= cnt) {
-			struct cache_entry *ce = o->result.cache[cnt];
-			if (ce->ce_flags & CE_REMOVE)
+		result = index_name_exists(&o->result, ce->name, ce_namelen(ce), 0);
+		if (result) {
+			if (result->ce_flags & CE_REMOVE)
 				return 0;
 		}

--- a/unpack-trees.h
+++ b/unpack-trees.h
@ -9,16 +9,16 @@ typedef int (*merge_fn_t)(struct cache_entry **src,
 		struct unpack_trees_options *options);

 struct unpack_trees_options {
-	int reset;
-	int merge;
-	int update;
-	int index_only;
-	int nontrivial_merge;
-	int trivial_merges_only;
-	int verbose_update;
-	int aggressive;
-	int skip_unmerged;
-	int gently;
+	unsigned int reset:1,
+		     merge:1,
+		     update:1,
+		     index_only:1,
+		     nontrivial_merge:1,
+		     trivial_merges_only:1,
+		     verbose_update:1,
+		     aggressive:1,
+		     skip_unmerged:1,
+		     gently:1;
 	const char *prefix;
 	int pos;
 	struct dir_struct *dir;
@ -31,7 +31,7 @@ struct unpack_trees_options {
 	void *unpack_data;

 	struct index_state *dst_index;
-	const struct index_state *src_index;
+	struct index_state *src_index;
 	struct index_state result;
 };