socsvn commit: r236814 - soc2012/gpf/misc
gpf at FreeBSD.org
gpf at FreeBSD.org
Thu May 31 14:48:02 UTC 2012
Author: gpf
Date: Thu May 31 14:47:59 2012
New Revision: 236814
URL: http://svnweb.FreeBSD.org/socsvn/?view=rev&rev=236814
Log:
Altered gleb's script to test how often cuckoo_insert fails (infinfinite loop)
and the relationship between success rate and hash table size.
Added:
soc2012/gpf/misc/ck.py
Added: soc2012/gpf/misc/ck.py
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ soc2012/gpf/misc/ck.py Thu May 31 14:47:59 2012 (r236814)
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+# gpf: testing how often cuckoo_insert goes into infinite loop
+# comments:
+# This implementation uses 2 hash tables and 2 respective hash functions.
+# The size of a hash table is the next mprime number of the total amount of elements
+# So for 100 elements we will have 2 * 101 = 202 cells.
+# observations:
+# This way, success rate is around ~85%.
+# If we increate the size of the tables by 10%, success rate goes to ~96%
+# In this case, success rates increase as the number of total elements n increase.
+
+import random
+import gmpy
+from gmpy import mpz
+
+def hash1(elem, hts):
+ pos = elem % hts
+ return pos
+
+def hash2(elem, hts):
+ pos = (elem / hts) % hts
+ return pos
+
+def cuckoo_insert(elem, ht1, ht2, hts):
+ max_tries = hts
+
+ for i in xrange(0, max_tries):
+ pos1 = hash1(elem, hts)
+ elem1 = ht1[pos1]
+ # do the cuckoo
+ ht1[pos1] = elem
+ if elem1 == 0:
+ return 0
+ pos2 = hash2(elem1, hts)
+ elem2 = ht2[pos2]
+ # do the cuckoo
+ ht2[pos2] = elem1
+ if elem2 == 0:
+ return 0
+ else:
+ elem = elem2
+ return 1
+
+def build_cuckoo_table(R, hts):
+ ht1 = [ 0 for x in xrange(0, hts) ]
+ ht2 = [ 0 for x in xrange(0, hts) ]
+
+ for i in xrange(0, len(R)):
+ r = R[i]
+ inf = cuckoo_insert(r, ht1, ht2, hts)
+ if inf == 1:
+ return 1
+
+ return 0
+
+def generate_ht(n, tries, extra_space):
+ nextra = int(n * extra_space / 100)
+ hts = int(gmpy.next_prime(mpz(n + nextra)))
+
+ print 'Items: %s' % (n,)
+ print 'Hash Table Sizes: ', hts
+ print 'Extra Space for each table: %.02f%%' % extra_space
+
+ successful = 0
+ for j in xrange(0, tries):
+ R = [ random.getrandbits(64) for i in xrange(0, n) ]
+ while len(R) != len(set(R)):
+ print 'Initial hash collisions, regenerating: %s/%s', len(set(R)), n
+ R = [ random.getrandbits(64) for i in xrange(0, n) ]
+
+ res = build_cuckoo_table(R, hts)
+ if res == 0:
+ successful += 1
+ #done = float(100) * j / tries
+ #if (done % 25) == 0:
+ #print 'Done: %.02f%%' % (done)
+
+ succ_rate = float(100) * successful / tries
+ print 'Tries: ', tries
+ print 'Successful %d, Success rate %.02f%%' % (successful, succ_rate)
+ print ''
+
+# XXXgpf: better to lower the 2nd argument by 1/10 if you want results in a couple of mins
+generate_ht(100, 10000, 0)
+generate_ht(1000, 10000, 0)
+generate_ht(10000, 10000, 0)
+generate_ht(100000, 1000, 0)
+
+generate_ht(100, 10000, 10)
+generate_ht(1000, 10000, 10)
+generate_ht(10000, 10000, 10)
+generate_ht(100000, 1000, 10)
More information about the svn-soc-all
mailing list