summaryrefslogtreecommitdiff
path: root/libstd
diff options
context:
space:
mode:
authorOri Bernstein <ori@eigenstate.org>2015-08-26 00:19:40 -0700
committerOri Bernstein <ori@eigenstate.org>2015-08-26 00:19:40 -0700
commit8b110be262c4839c498c09b2bd803f24a8b8e9b7 (patch)
tree90fae3aa7dd33743ac726a95b40c7abfc4a7d96c /libstd
parentfc888548ad40d62ba16abc7ab5e38b993bd4e59a (diff)
downloadmc-8b110be262c4839c498c09b2bd803f24a8b8e9b7.tar.gz
Test and fix hash tables.
Diffstat (limited to 'libstd')
-rw-r--r--libstd/htab.myr36
-rw-r--r--libstd/test/htab.myr104
2 files changed, 123 insertions, 17 deletions
diff --git a/libstd/htab.myr b/libstd/htab.myr
index 887d4f6..ac2e7aa 100644
--- a/libstd/htab.myr
+++ b/libstd/htab.myr
@@ -10,6 +10,7 @@ pkg std =
eq : (a : @k, b : @k -> bool)
nelt : size
+ ndead : size
keys : @k[:]
vals : @v[:]
hashes : uint32[:]
@@ -28,6 +29,8 @@ pkg std =
const Initsz = 32
+extern const put : (fmt : byte[:], args : ... -> size)
+
generic hash = {ht, k
var h
@@ -39,25 +42,24 @@ generic hash = {ht, k
;;
}
-generic resize = {ht
+generic resize = {ht, sz
var oldk
var oldv
var oldh
var oldd
- var sz
var i
oldk = ht.keys
oldv = ht.vals
oldh = ht.hashes
oldd = ht.dead
- sz = 2*max(ht.keys.len, 1)
ht.keys = slalloc(sz)
ht.vals = slalloc(sz)
ht.hashes = slzalloc(sz)
ht.dead = slzalloc(sz)
ht.nelt = 0
+ ht.ndead = 0
for i = 0; i < oldk.len; i++
if oldh[i] != 0 && !oldd[i]
htput(ht, oldk[i], oldv[i])
@@ -82,15 +84,14 @@ generic idx = {ht, k
i = (h + di) & (ht.keys.len - 1)
;;
- if ht.hashes[i] == 0 || ht.dead[i]
+ if ht.hashes[i] == 0
-> `None
;;
- if ht.eq(ht.keys[i], k)
+ if ht.hashes[i] == h && !ht.dead[i] && ht.eq(ht.keys[i], k)
break
- else
- di++
- i = (h + di) & (ht.keys.len - 1)
;;
+ di++
+ i = (h + di) & (ht.keys.len - 1)
;;
-> `Some i
}
@@ -104,6 +105,7 @@ generic mkht = {h, eq
ht.eq = eq
ht.nelt = 0
+ ht.ndead = 0
ht.keys = slalloc(Initsz)
ht.vals = slalloc(Initsz)
ht.hashes = slzalloc(Initsz)
@@ -129,14 +131,12 @@ generic htput = {ht, k, v
i = h & (ht.keys.len - 1)
neltincr = 1
while ht.hashes[i] != 0 && !ht.dead[i]
- /*
- second insertion just overwrites first.
- nb: comparing keys for dead values is bad.
- */
- if ht.hashes[i] == h
+ /* second insertion overwrites */
+ if ht.hashes[i] == h && !ht.dead[i]
+ /* dead key, we can just insert here */
if ht.dead[i]
break
- /* replacing a key shouldn't grow the element count */
+ /* replacing a key */
elif ht.eq(ht.keys[i], k)
neltincr = 0
break
@@ -151,7 +151,7 @@ generic htput = {ht, k, v
ht.vals[i] = v
ht.dead[i] = false
if ht.keys.len < ht.nelt * 2
- resize(ht)
+ resize(ht, 2*ht.keys.len)
;;
}
@@ -160,9 +160,11 @@ generic htdel = {ht, k
| `Some i:
ht.dead[i] = true
ht.nelt--
+ ht.ndead++
+ std.put("ndead = {}\n", ht.ndead)
/* remove tombstones if we shrink enough */
- if ht.keys.len > ht.nelt * 4
- resize(ht)
+ if ht.keys.len < ht.ndead * 4
+ resize(ht, ht.keys.len)
;;
| _:
/* do nothing */
diff --git a/libstd/test/htab.myr b/libstd/test/htab.myr
new file mode 100644
index 0000000..1801d76
--- /dev/null
+++ b/libstd/test/htab.myr
@@ -0,0 +1,104 @@
+use std
+
+const insertion = {
+ /*
+ var ht
+ var i
+
+ ht = std.mkht(idhash, ideq)
+ /* only a few values; shouldn't trigger growth */
+ for i = 0; i < 5; i++
+ std.htput(ht, i, i)
+ ;;
+ for i = 0; i < 5; i++
+ std.assert(std.htgetv(ht, i, -1) == i, "returned incorrect value from hash table")
+ ;;
+
+ /* and grow */
+ for i = 0; i < 5000; i++
+ std.htput(ht, i, i)
+ ;;
+ for i = 0; i < 5000; i++
+ std.assert(std.htgetv(ht, i, -1) == i, "returned incorrect value from hash table")
+ ;;
+ */
+}
+
+const deletion = {
+ var ht
+ var i
+
+ ht = std.mkht(idhash, ideq)
+ /* create a hash table with a few hundred values */
+ for i = 0; i < 4000; i++
+ std.htput(ht, i, i)
+ ;;
+ for i = 0; i < 200; i++
+ std.htdel(ht, i*2)
+ ;;
+ for i = 0; i < 200; i++
+ std.assert(!std.hthas(ht, i*2), "deleted item still present")
+ ;;
+ for i = 0; i < 200; i++
+ std.assert(std.hthas(ht, i*2+1), "undeleted item missing")
+ ;;
+ for i = 400; i < 4000; i++
+ std.assert(std.hthas(ht, i), "undeleted item missing")
+ ;;
+
+}
+
+const collision = {
+ var ht
+ var i
+
+ ht = std.mkht(idhash, ideq)
+ /* insert an element a few hundred times */
+ for i = 0; i < 500; i++
+ std.htput(ht, 0, i)
+ ;;
+ std.assert(std.hthas(ht, 0), "inserted element not present")
+ std.assert(std.htgetv(ht, 0, -1) == 499, "inserted element has wrong value")
+ std.htdel(ht, 0)
+ std.assert(!std.hthas(ht, 0), "element left in table")
+}
+
+const tombstonefill = {
+ var ht
+ var i
+
+ ht = std.mkht(idhash, ideq)
+ /*
+ insert an element into each slot in the hash table, and
+ delete it. With direct hashing, this is guaranteed to have
+ put a tombstone into each slot.
+ */
+ for i = 0; i <= ht.keys.len; i++
+ std.htput(ht, i, i)
+ std.htdel(ht, i)
+ ;;
+ /* make sure we haven't actually got anything in the table */
+ std.assert(ht.nelt == 0, "elements left in hash table")
+ std.assert(!std.hthas(ht, 1), "found phantom element")
+}
+
+const main = {
+ /* only a few elements */
+ std.put("insertion\n")
+ insertion()
+ std.put("deletion\n")
+ deletion()
+ std.put("collision\n")
+ collision()
+
+ /* what happens if we try to fill everything up with tombstones? */
+ tombstonefill()
+}
+
+const idhash = {x
+ -> x
+}
+
+const ideq = {a, b
+ -> a == b
+}