socsvn commit: r237057 - soc2012/gpf/pefs_kmod/sbin/pefs

gpf at FreeBSD.org gpf at FreeBSD.org
Mon Jun 4 12:25:46 UTC 2012


Author: gpf
Date: Mon Jun  4 12:25:43 2012
New Revision: 237057
URL: http://svnweb.FreeBSD.org/socsvn/?view=rev&rev=237057

Log:
  rewrite pefs_next_prime
  

Modified:
  soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c

Modified: soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c
==============================================================================
--- soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c	Mon Jun  4 11:51:17 2012	(r237056)
+++ soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c	Mon Jun  4 12:25:43 2012	(r237057)
@@ -53,7 +53,7 @@
 
 #include "pefs_ctl.h"
 
-#define PEFS_INTEGRITY_DEBUG
+//#define PEFS_INTEGRITY_DEBUG
 #if defined (PEFS_INTEGRITY_DEBUG)
 #define dprintf(a)		printf a
 #else
@@ -62,7 +62,7 @@
 
 #define PEFS_CHECKSUM_FILE_VERSION 0xDD
 #define PEFS_HASH_BYTE_ALIGNMENT 512
-#define PEFS_EXTRA_TABLE_SIZE 0.15
+#define PEFS_EXTRA_TABLE_SIZE 15
 
 TAILQ_HEAD(checksum_head, checksum);
 
@@ -112,14 +112,15 @@
 static int
 pefs_is_prime(uint32_t num)
 {
-	int i;
+	uint32_t i, sq;
 
-	if (num == 0)
-		return 1;
+	if ((num % 2 == 0) || (num % 3 == 0))
+		return 0;
 
-	/* XXXgpf: [TODO] Take a look at arithmetics, comparisons between signed/unsigned etc */
-	for (i = 2; i <= sqrt(num); i++)  {
-		if (num % i == 0)
+	sq = sqrt(num);
+	/* All other primes are in the form of 6k+-1 */
+	for (i = 5; i <= sq; i+=6)  {
+		if ((num % i == 0) || (num % (i+2) == 0))
 			return 0;
 	}
 
@@ -131,10 +132,15 @@
 {
 	uint32_t i;
 
-	for (i = num;; i++)  {
+	if (num % 2 == 0)
+		num+=1;
+
+	for (i = num;; i+=2) {
 		if (pefs_is_prime(i))
 			return i;
 	}
+
+	return 0;
 }
 
 static int
@@ -262,11 +268,11 @@
 {
 	uint32_t i;
 
-	/* 
-	 * spending 15% more space for each table lowers the chance to fall into an 
+	/*
+	 * spending 15% more space for each table lowers the chance to fall into an
 	 * infinite loop during cuckoo insertion to about 1.5%.
 	 */
-	chtp->size = pefs_next_prime(nelements + (nelements * PEFS_EXTRA_TABLE_SIZE));
+	chtp->size = pefs_next_prime(nelements + ((nelements * PEFS_EXTRA_TABLE_SIZE)/100));
 	chtp->nelements = nelements;
 	if (chtp->size < chtp->nelements) {
 		pefs_warn("numeric overflow while computing hash table size");
@@ -348,6 +354,15 @@
 	return (nbucket);
 }
 
+/*
+ * XXXgpf: [TODO] change hash2
+ * gleb says:
+ * http://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function
+ * Yes, I've mentioned it because it's available under
+ * /usr/include/sys/fnv_hash.h
+ * murmur3 has good performance/collisions characteristics.
+ */
+
 static uint32_t
 pefs_hash2(struct cuckoo_hash_table *chtp, struct file_header *fhp)
 {
@@ -584,7 +599,7 @@
  * 			file should be regular file
  * 		B2) the file_id is retrieved.
  * 		B3) list of checksums is computed for the file's 4k blocks.
- * 		B4) file entry is added to hash table. (separate chaining is used)
+ * 		B4) file entry is added to hash table. (cuckoo hashing is used)
  */
 static int
 pefs_create_in_memory_db(FILE *fpin, const EVP_MD *md, uint8_t hash_len,
@@ -757,22 +772,22 @@
  * Writes are always in little endian byte order.
  *
  * First 16 bytes of .pefs.checksum are filled with .pefs.checksum's file header.
- * Right after this header lies the 'index' part of our database. This index is later 
+ * Right after this header lies the 'index' part of our database. This index is later
  * kept in kernel memory.
  *
- * Index: 
- * Both hash tables of cuckoo algorithm are written to the file sequentially. The 
+ * Index:
+ * Both hash tables of cuckoo algorithm are written to the file sequentially. The
  * first hash table corresponds to hash1() and the second hash table to hash2().
  * Each bucket (cell) of a hash table contains at most one entry(=file_header).
  * The size of an entry is 16 bytes.
- * 
- * hash table entries end at the following offset: 
+ *
+ * hash table entries end at the following offset:
  * 16 + hash_table_size * 2 * 16
  *
  * Checksums:
  * The last part of .pefs.checksum is filled with the actual checksums.
  * The offset where the first checksum starts is a 512 aligned address.
- * Each hash table file header entry contains an offset that points to the beginning 
+ * Each hash table file header entry contains an offset that points to the beginning
  * of a chain of checksums for that particular file's 4k blocks.
  */
 static int


More information about the svn-soc-all mailing list