diff options
author | Ori Bernstein <ori@eigenstate.org> | 2015-08-26 00:19:40 -0700 |
---|---|---|
committer | Ori Bernstein <ori@eigenstate.org> | 2015-08-26 00:19:40 -0700 |
commit | 8b110be262c4839c498c09b2bd803f24a8b8e9b7 (patch) | |
tree | 90fae3aa7dd33743ac726a95b40c7abfc4a7d96c | |
parent | fc888548ad40d62ba16abc7ab5e38b993bd4e59a (diff) | |
download | mc-8b110be262c4839c498c09b2bd803f24a8b8e9b7.tar.gz |
Test and fix hash tables.
-rw-r--r-- | libstd/htab.myr | 36 | ||||
-rw-r--r-- | libstd/test/htab.myr | 104 | ||||
-rw-r--r-- | mk/c.mk | 2 | ||||
-rw-r--r-- | parse/htab.c | 2 |
4 files changed, 125 insertions, 19 deletions
diff --git a/libstd/htab.myr b/libstd/htab.myr index 887d4f6..ac2e7aa 100644 --- a/libstd/htab.myr +++ b/libstd/htab.myr @@ -10,6 +10,7 @@ pkg std = eq : (a : @k, b : @k -> bool) nelt : size + ndead : size keys : @k[:] vals : @v[:] hashes : uint32[:] @@ -28,6 +29,8 @@ pkg std = const Initsz = 32 +extern const put : (fmt : byte[:], args : ... -> size) + generic hash = {ht, k var h @@ -39,25 +42,24 @@ generic hash = {ht, k ;; } -generic resize = {ht +generic resize = {ht, sz var oldk var oldv var oldh var oldd - var sz var i oldk = ht.keys oldv = ht.vals oldh = ht.hashes oldd = ht.dead - sz = 2*max(ht.keys.len, 1) ht.keys = slalloc(sz) ht.vals = slalloc(sz) ht.hashes = slzalloc(sz) ht.dead = slzalloc(sz) ht.nelt = 0 + ht.ndead = 0 for i = 0; i < oldk.len; i++ if oldh[i] != 0 && !oldd[i] htput(ht, oldk[i], oldv[i]) @@ -82,15 +84,14 @@ generic idx = {ht, k i = (h + di) & (ht.keys.len - 1) ;; - if ht.hashes[i] == 0 || ht.dead[i] + if ht.hashes[i] == 0 -> `None ;; - if ht.eq(ht.keys[i], k) + if ht.hashes[i] == h && !ht.dead[i] && ht.eq(ht.keys[i], k) break - else - di++ - i = (h + di) & (ht.keys.len - 1) ;; + di++ + i = (h + di) & (ht.keys.len - 1) ;; -> `Some i } @@ -104,6 +105,7 @@ generic mkht = {h, eq ht.eq = eq ht.nelt = 0 + ht.ndead = 0 ht.keys = slalloc(Initsz) ht.vals = slalloc(Initsz) ht.hashes = slzalloc(Initsz) @@ -129,14 +131,12 @@ generic htput = {ht, k, v i = h & (ht.keys.len - 1) neltincr = 1 while ht.hashes[i] != 0 && !ht.dead[i] - /* - second insertion just overwrites first. - nb: comparing keys for dead values is bad. - */ - if ht.hashes[i] == h + /* second insertion overwrites */ + if ht.hashes[i] == h && !ht.dead[i] + /* dead key, we can just insert here */ if ht.dead[i] break - /* replacing a key shouldn't grow the element count */ + /* replacing a key */ elif ht.eq(ht.keys[i], k) neltincr = 0 break @@ -151,7 +151,7 @@ generic htput = {ht, k, v ht.vals[i] = v ht.dead[i] = false if ht.keys.len < ht.nelt * 2 - resize(ht) + resize(ht, 2*ht.keys.len) ;; } @@ -160,9 +160,11 @@ generic htdel = {ht, k | `Some i: ht.dead[i] = true ht.nelt-- + ht.ndead++ + std.put("ndead = {}\n", ht.ndead) /* remove tombstones if we shrink enough */ - if ht.keys.len > ht.nelt * 4 - resize(ht) + if ht.keys.len < ht.ndead * 4 + resize(ht, ht.keys.len) ;; | _: /* do nothing */ diff --git a/libstd/test/htab.myr b/libstd/test/htab.myr new file mode 100644 index 0000000..1801d76 --- /dev/null +++ b/libstd/test/htab.myr @@ -0,0 +1,104 @@ +use std + +const insertion = { + /* + var ht + var i + + ht = std.mkht(idhash, ideq) + /* only a few values; shouldn't trigger growth */ + for i = 0; i < 5; i++ + std.htput(ht, i, i) + ;; + for i = 0; i < 5; i++ + std.assert(std.htgetv(ht, i, -1) == i, "returned incorrect value from hash table") + ;; + + /* and grow */ + for i = 0; i < 5000; i++ + std.htput(ht, i, i) + ;; + for i = 0; i < 5000; i++ + std.assert(std.htgetv(ht, i, -1) == i, "returned incorrect value from hash table") + ;; + */ +} + +const deletion = { + var ht + var i + + ht = std.mkht(idhash, ideq) + /* create a hash table with a few hundred values */ + for i = 0; i < 4000; i++ + std.htput(ht, i, i) + ;; + for i = 0; i < 200; i++ + std.htdel(ht, i*2) + ;; + for i = 0; i < 200; i++ + std.assert(!std.hthas(ht, i*2), "deleted item still present") + ;; + for i = 0; i < 200; i++ + std.assert(std.hthas(ht, i*2+1), "undeleted item missing") + ;; + for i = 400; i < 4000; i++ + std.assert(std.hthas(ht, i), "undeleted item missing") + ;; + +} + +const collision = { + var ht + var i + + ht = std.mkht(idhash, ideq) + /* insert an element a few hundred times */ + for i = 0; i < 500; i++ + std.htput(ht, 0, i) + ;; + std.assert(std.hthas(ht, 0), "inserted element not present") + std.assert(std.htgetv(ht, 0, -1) == 499, "inserted element has wrong value") + std.htdel(ht, 0) + std.assert(!std.hthas(ht, 0), "element left in table") +} + +const tombstonefill = { + var ht + var i + + ht = std.mkht(idhash, ideq) + /* + insert an element into each slot in the hash table, and + delete it. With direct hashing, this is guaranteed to have + put a tombstone into each slot. + */ + for i = 0; i <= ht.keys.len; i++ + std.htput(ht, i, i) + std.htdel(ht, i) + ;; + /* make sure we haven't actually got anything in the table */ + std.assert(ht.nelt == 0, "elements left in hash table") + std.assert(!std.hthas(ht, 1), "found phantom element") +} + +const main = { + /* only a few elements */ + std.put("insertion\n") + insertion() + std.put("deletion\n") + deletion() + std.put("collision\n") + collision() + + /* what happens if we try to fill everything up with tombstones? */ + tombstonefill() +} + +const idhash = {x + -> x +} + +const ideq = {a, b + -> a == b +} @@ -6,7 +6,7 @@ _LIBSRCHPATHS=$(addprefix -L, $(dir $(DEPS))) _LIBINCPATHS=$(addprefix -I, $(dir $(DEPS))) _LIBPATHS=$(addprefix -l, $(patsubst lib%.a,%,$(notdir $(DEPS)))) -CFLAGS += -O0 -Wall -Werror -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -g +CFLAGS += -O -Wall -Werror -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -g CFLAGS += -MMD -MP -MF ${_DEPSDIR}/$(subst /,-,$*).d LIB ?= $(INSTLIB) diff --git a/parse/htab.c b/parse/htab.c index 8f5cb51..1bdad3a 100644 --- a/parse/htab.c +++ b/parse/htab.c @@ -150,7 +150,7 @@ searchmore: } if (!ht->hashes[i]) return -1; - if (!ht->cmp(ht->keys[i], k) || (ht->hashes[i] == h && ht->dead[i])) + if ((ht->hashes[i] == h && ht->dead[i]) || !ht->cmp(ht->keys[i], k)) goto searchmore; /* collision */ return i; } |