From 1654dd0cf5ee1827322aca156af7d96d757201c7 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 6 Nov 2009 21:55:25 -0800 Subject: [PATCH] ceph: make object hash a pg_pool property The object will be hashed to a placement seed (ps) based on the pg_pool's hash function. This allows new hashes to be introduced into an existing object store, or selection of a hash appropriate to the objects that will be stored in a particular pool. Signed-off-by: Sage Weil --- fs/ceph/Makefile | 2 +- fs/ceph/README | 2 + fs/ceph/ceph_fs.c | 77 ----------------------------- fs/ceph/ceph_fs.h | 2 - fs/ceph/ceph_hash.c | 118 ++++++++++++++++++++++++++++++++++++++++++++ fs/ceph/ceph_hash.h | 13 +++++ fs/ceph/osdmap.c | 2 +- fs/ceph/rados.h | 1 + fs/ceph/types.h | 1 + 9 files changed, 137 insertions(+), 81 deletions(-) create mode 100644 fs/ceph/ceph_hash.c create mode 100644 fs/ceph/ceph_hash.h diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile index 8bad70aac363..bdd3e6fc1609 100644 --- a/fs/ceph/Makefile +++ b/fs/ceph/Makefile @@ -13,7 +13,7 @@ ceph-objs := super.o inode.o dir.o file.o addr.o ioctl.o \ mon_client.o \ osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ debugfs.o \ - ceph_fs.o ceph_strings.o ceph_frag.o + ceph_fs.o ceph_strings.o ceph_hash.o ceph_frag.o else #Otherwise we were called directly from the command diff --git a/fs/ceph/README b/fs/ceph/README index 660e00074d59..18352fab37c0 100644 --- a/fs/ceph/README +++ b/fs/ceph/README @@ -10,6 +10,8 @@ src/include/rados.h fs/ceph/rados.h src/include/ceph_strings.cc fs/ceph/ceph_strings.c src/include/ceph_frag.h fs/ceph/ceph_frag.h src/include/ceph_frag.cc fs/ceph/ceph_frag.c +src/include/ceph_hash.h fs/ceph/ceph_hash.h +src/include/ceph_hash.cc fs/ceph/ceph_hash.c src/crush/crush.c fs/ceph/crush/crush.c src/crush/crush.h fs/ceph/crush/crush.h src/crush/mapper.c fs/ceph/crush/mapper.c diff --git a/fs/ceph/ceph_fs.c b/fs/ceph/ceph_fs.c index b3ecf1b07521..79d76bc4303f 100644 --- a/fs/ceph/ceph_fs.c +++ b/fs/ceph/ceph_fs.c @@ -72,80 +72,3 @@ int ceph_caps_for_mode(int mode) } return 0; } - -/* - * Robert Jenkin's hash function. - * http://burtleburtle.net/bob/hash/evahash.html - * This is in the public domain. - */ -#define mix(a, b, c) \ - do { \ - a = a - b; a = a - c; a = a ^ (c >> 13); \ - b = b - c; b = b - a; b = b ^ (a << 8); \ - c = c - a; c = c - b; c = c ^ (b >> 13); \ - a = a - b; a = a - c; a = a ^ (c >> 12); \ - b = b - c; b = b - a; b = b ^ (a << 16); \ - c = c - a; c = c - b; c = c ^ (b >> 5); \ - a = a - b; a = a - c; a = a ^ (c >> 3); \ - b = b - c; b = b - a; b = b ^ (a << 10); \ - c = c - a; c = c - b; c = c ^ (b >> 15); \ - } while (0) - -unsigned int ceph_full_name_hash(const char *str, unsigned int length) -{ - const unsigned char *k = (const unsigned char *)str; - __u32 a, b, c; /* the internal state */ - __u32 len; /* how many key bytes still need mixing */ - - /* Set up the internal state */ - len = length; - a = 0x9e3779b9; /* the golden ratio; an arbitrary value */ - b = a; - c = 0; /* variable initialization of internal state */ - - /* handle most of the key */ - while (len >= 12) { - a = a + (k[0] + ((__u32)k[1] << 8) + ((__u32)k[2] << 16) + - ((__u32)k[3] << 24)); - b = b + (k[4] + ((__u32)k[5] << 8) + ((__u32)k[6] << 16) + - ((__u32)k[7] << 24)); - c = c + (k[8] + ((__u32)k[9] << 8) + ((__u32)k[10] << 16) + - ((__u32)k[11] << 24)); - mix(a, b, c); - k = k + 12; - len = len - 12; - } - - /* handle the last 11 bytes */ - c = c + length; - switch (len) { /* all the case statements fall through */ - case 11: - c = c + ((__u32)k[10] << 24); - case 10: - c = c + ((__u32)k[9] << 16); - case 9: - c = c + ((__u32)k[8] << 8); - /* the first byte of c is reserved for the length */ - case 8: - b = b + ((__u32)k[7] << 24); - case 7: - b = b + ((__u32)k[6] << 16); - case 6: - b = b + ((__u32)k[5] << 8); - case 5: - b = b + k[4]; - case 4: - a = a + ((__u32)k[3] << 24); - case 3: - a = a + ((__u32)k[2] << 16); - case 2: - a = a + ((__u32)k[1] << 8); - case 1: - a = a + k[0]; - /* case 0: nothing left to add */ - } - mix(a, b, c); - - return c; -} - diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h index 25fc537f4140..36becb024788 100644 --- a/fs/ceph/ceph_fs.h +++ b/fs/ceph/ceph_fs.h @@ -49,8 +49,6 @@ #define CEPH_MAX_MON 31 -unsigned int ceph_full_name_hash(const char *name, unsigned int len); - /* * ceph_file_layout - describe data layout for a file/inode diff --git a/fs/ceph/ceph_hash.c b/fs/ceph/ceph_hash.c new file mode 100644 index 000000000000..ac8be54631fe --- /dev/null +++ b/fs/ceph/ceph_hash.c @@ -0,0 +1,118 @@ + +#include "types.h" + +/* + * Robert Jenkin's hash function. + * http://burtleburtle.net/bob/hash/evahash.html + * This is in the public domain. + */ +#define mix(a, b, c) \ + do { \ + a = a - b; a = a - c; a = a ^ (c >> 13); \ + b = b - c; b = b - a; b = b ^ (a << 8); \ + c = c - a; c = c - b; c = c ^ (b >> 13); \ + a = a - b; a = a - c; a = a ^ (c >> 12); \ + b = b - c; b = b - a; b = b ^ (a << 16); \ + c = c - a; c = c - b; c = c ^ (b >> 5); \ + a = a - b; a = a - c; a = a ^ (c >> 3); \ + b = b - c; b = b - a; b = b ^ (a << 10); \ + c = c - a; c = c - b; c = c ^ (b >> 15); \ + } while (0) + +unsigned ceph_str_hash_rjenkins(const char *str, unsigned length) +{ + const unsigned char *k = (const unsigned char *)str; + __u32 a, b, c; /* the internal state */ + __u32 len; /* how many key bytes still need mixing */ + + /* Set up the internal state */ + len = length; + a = 0x9e3779b9; /* the golden ratio; an arbitrary value */ + b = a; + c = 0; /* variable initialization of internal state */ + + /* handle most of the key */ + while (len >= 12) { + a = a + (k[0] + ((__u32)k[1] << 8) + ((__u32)k[2] << 16) + + ((__u32)k[3] << 24)); + b = b + (k[4] + ((__u32)k[5] << 8) + ((__u32)k[6] << 16) + + ((__u32)k[7] << 24)); + c = c + (k[8] + ((__u32)k[9] << 8) + ((__u32)k[10] << 16) + + ((__u32)k[11] << 24)); + mix(a, b, c); + k = k + 12; + len = len - 12; + } + + /* handle the last 11 bytes */ + c = c + length; + switch (len) { /* all the case statements fall through */ + case 11: + c = c + ((__u32)k[10] << 24); + case 10: + c = c + ((__u32)k[9] << 16); + case 9: + c = c + ((__u32)k[8] << 8); + /* the first byte of c is reserved for the length */ + case 8: + b = b + ((__u32)k[7] << 24); + case 7: + b = b + ((__u32)k[6] << 16); + case 6: + b = b + ((__u32)k[5] << 8); + case 5: + b = b + k[4]; + case 4: + a = a + ((__u32)k[3] << 24); + case 3: + a = a + ((__u32)k[2] << 16); + case 2: + a = a + ((__u32)k[1] << 8); + case 1: + a = a + k[0]; + /* case 0: nothing left to add */ + } + mix(a, b, c); + + return c; +} + +/* + * linux dcache hash + */ +unsigned ceph_str_hash_linux(const char *str, unsigned length) +{ + unsigned long hash = 0; + unsigned char c; + + while (length-- > 0) { + c = *str++; + hash = (hash + (c << 4) + (c >> 4)) * 11; + } + return hash; +} + + +unsigned ceph_str_hash(int type, const char *s, unsigned len) +{ + switch (type) { + case CEPH_STR_HASH_LINUX: + return ceph_str_hash_linux(s, len); + case CEPH_STR_HASH_RJENKINS: + return ceph_str_hash_rjenkins(s, len); + default: + return -1; + } +} + +const char *ceph_str_hash_name(int type) +{ + switch (type) { + case CEPH_STR_HASH_LINUX: + return "linux"; + case CEPH_STR_HASH_RJENKINS: + return "rjenkins"; + default: + return "unknown"; + } +} diff --git a/fs/ceph/ceph_hash.h b/fs/ceph/ceph_hash.h new file mode 100644 index 000000000000..5ac470c433c9 --- /dev/null +++ b/fs/ceph/ceph_hash.h @@ -0,0 +1,13 @@ +#ifndef _FS_CEPH_HASH_H +#define _FS_CEPH_HASH_H + +#define CEPH_STR_HASH_LINUX 0x1 /* linux dcache hash */ +#define CEPH_STR_HASH_RJENKINS 0x2 /* robert jenkins' */ + +extern unsigned ceph_str_hash_linux(const char *s, unsigned len); +extern unsigned ceph_str_hash_rjenkins(const char *s, unsigned len); + +extern unsigned ceph_str_hash(int type, const char *s, unsigned len); +extern const char *ceph_str_hash_name(int type); + +#endif diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index a025555b70af..68478270ad70 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c @@ -809,7 +809,7 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol, return -EIO; pool = &osdmap->pg_pool[poolid]; - ps = ceph_full_name_hash(oid, strlen(oid)); + ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid)); if (preferred >= 0) { ps += preferred; num = le32_to_cpu(pool->v.lpg_num); diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h index 85bdef78d142..fb23ff9297c9 100644 --- a/fs/ceph/rados.h +++ b/fs/ceph/rados.h @@ -84,6 +84,7 @@ struct ceph_pg_pool { __u8 type; /* CEPH_PG_TYPE_* */ __u8 size; /* number of osds in each pg */ __u8 crush_ruleset; /* crush placement rule */ + __u8 object_hash; /* hash mapping object name to ps */ __le32 pg_num, pgp_num; /* number of pg's */ __le32 lpg_num, lpgp_num; /* number of localized pg's */ __le32 last_change; /* most recent epoch changed */ diff --git a/fs/ceph/types.h b/fs/ceph/types.h index 8a514568cab2..28b35a005ec2 100644 --- a/fs/ceph/types.h +++ b/fs/ceph/types.h @@ -9,6 +9,7 @@ #include "ceph_fs.h" #include "ceph_frag.h" +#include "ceph_hash.h" /* * Identify inodes by both their ino AND snapshot id (a u64).