Discussion:
[PATCH] utils: add hash_str(), eliminate copy-pasted hash functions
Gregory Petrosyan
2011-05-06 12:55:30 UTC
Permalink
While eliminating copy-paste, replace hash function with the proper DJB one,
which is better than x31 [1].

[1] http://www.team5150.com/~andrew/noncryptohashzoo/DJB.html

Signed-off-by: Gregory Petrosyan <***@gmail.com>
---
cache.c | 18 ++++--------------
lib.c | 15 ++-------------
utils.h | 13 +++++++++++++
3 files changed, 19 insertions(+), 27 deletions(-)

diff --git a/cache.c b/cache.c
index b334cc6..b6d843e 100644
--- a/cache.c
+++ b/cache.c
@@ -66,16 +66,6 @@ static int new;

pthread_mutex_t cache_mutex = CMUS_MUTEX_INITIALIZER;

-static unsigned int filename_hash(const char *filename)
-{
- unsigned int hash = 0;
- int i;
-
- for (i = 0; filename[i]; i++)
- hash = (hash << 5) - hash + filename[i];
- return hash;
-}
-
static void add_ti(struct track_info *ti, unsigned int hash)
{
unsigned int pos = hash % HASH_SIZE;
@@ -193,7 +183,7 @@ static void do_cache_remove_ti(struct track_info *ti, unsigned int hash)

void cache_remove_ti(struct track_info *ti)
{
- do_cache_remove_ti(ti, filename_hash(ti->filename));
+ do_cache_remove_ti(ti, hash_str(ti->filename));
}

static int read_cache(void)
@@ -232,7 +222,7 @@ static int read_cache(void)
goto corrupt;

ti = cache_entry_to_ti(e);
- add_ti(ti, filename_hash(ti->filename));
+ add_ti(ti, hash_str(ti->filename));
offset += ALIGN(e->size);
}
munmap(buf, size);
@@ -419,7 +409,7 @@ static struct track_info *ip_get_ti(const char *filename)

struct track_info *cache_get_ti(const char *filename)
{
- unsigned int hash = filename_hash(filename);
+ unsigned int hash = hash_str(filename);
struct track_info *ti;

ti = lookup_cache_entry(filename, hash);
@@ -463,7 +453,7 @@ struct track_info **cache_refresh(int *count)
}
}

- hash = filename_hash(ti->filename);
+ hash = hash_str(ti->filename);
track_info_ref(ti);
do_cache_remove_ti(ti, hash);

diff --git a/lib.c b/lib.c
index 09d96a4..f92451e 100644
--- a/lib.c
+++ b/lib.c
@@ -104,21 +104,10 @@ struct fh_entry {
#define FH_SIZE (1024)
static struct fh_entry *ti_hash[FH_SIZE] = { NULL, };

-/* this is from glib */
-static unsigned int str_hash(const char *str)
-{
- unsigned int hash = 0;
- int i;
-
- for (i = 0; str[i]; i++)
- hash = (hash << 5) - hash + str[i];
- return hash;
-}
-
static int hash_insert(struct track_info *ti)
{
const char *filename = ti->filename;
- unsigned int pos = str_hash(filename) % FH_SIZE;
+ unsigned int pos = hash_str(filename) % FH_SIZE;
struct fh_entry **entryp;
struct fh_entry *e;

@@ -143,7 +132,7 @@ static int hash_insert(struct track_info *ti)
static void hash_remove(struct track_info *ti)
{
const char *filename = ti->filename;
- unsigned int pos = str_hash(filename) % FH_SIZE;
+ unsigned int pos = hash_str(filename) % FH_SIZE;
struct fh_entry **entryp;

entryp = &ti_hash[pos];
diff --git a/utils.h b/utils.h
index cd95426..f625462 100644
--- a/utils.h
+++ b/utils.h
@@ -94,6 +94,19 @@ static inline int strcmp0(const char *str1, const char *str2)
return strcmp(str1, str2);
}

+static inline uint32_t hash_str(const char *s)
+{
+ const unsigned char *p = (const unsigned char *)s;
+ uint32_t h = 5381;
+
+ while (*p) {
+ h *= 33;
+ h ^= *p++;
+ }
+
+ return h ^ (h >> 16);
+}
+
static inline time_t file_get_mtime(const char *filename)
{
struct stat s;
--
1.7.4.1
Johannes Weißl
2011-05-06 16:01:29 UTC
Permalink
Post by Gregory Petrosyan
While eliminating copy-paste, replace hash function with the proper DJB one,
which is better than x31 [1].
[1] http://www.team5150.com/~andrew/noncryptohashzoo/DJB.html
Great! Are you using tools to eliminate copy-paste? I guess cmus code
contains many redundant / duplicate parts... e.g. the u_strcoll
functions are not used anymore...

Johannes
Gregory Petrosyan
2011-05-06 16:09:58 UTC
Permalink
Post by Johannes Weißl
Post by Gregory Petrosyan
While eliminating copy-paste, replace hash function with the proper DJB one,
which is better than x31 [1].
[1] http://www.team5150.com/~andrew/noncryptohashzoo/DJB.html
Great! Are you using tools to eliminate copy-paste? I guess cmus code
contains many redundant / duplicate parts... e.g. the u_strcoll
functions are not used anymore...
No, I was just looking at cache.c's hash (since I love hash functions
:-), and when replacing it, did a "git grep hash".

                Gregory

Loading...