socsvn commit: r236882 - soc2012/gpf/pefs_kmod/sbin/pefs

Fri Jun 1 16:30:53 UTC 2012

Author: gpf
Date: Fri Jun  1 16:30:51 2012
New Revision: 236882
URL: http://svnweb.FreeBSD.org/socsvn/?view=rev&rev=236882

Log:
  replace simple separate hashing with cuckoo hashing for .pefs.checksum.
  

Modified:
  soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c
  soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.h

Modified: soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c
==============================================================================

--- soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c	Fri Jun  1 16:29:59 2012	(r236881)
+++ soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c	Fri Jun  1 16:30:51 2012	(r236882)
@@ -38,6 +38,7 @@
 #include <ctype.h>
 #include <dirent.h>
 #include <inttypes.h>
+#include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -61,13 +62,12 @@
 
 #define PEFS_CHECKSUM_FILE_VERSION 0xDD
 #define PEFS_HASH_BYTE_ALIGNMENT 512
+#define PEFS_EXTRA_TABLE_SIZE 0.15
 
-LIST_HEAD(file_header_head, file_header);
 TAILQ_HEAD(checksum_head, checksum);
 
 #define PEFS_CFH_SIZE 16
 #define PEFS_FH_SIZE 16
-#define PEFS_BUCKET_SIZE 8
 
 /* XXXgpf: unions for on disk structs and move to a different header? */
 struct checksum_file_header {
@@ -95,18 +95,49 @@
 };
 
 struct bucket {
-	struct file_header_head file_headers;
-	uint32_t offset_to_chain;
-	uint32_t nelements;
+	struct file_header * fhp;
 };
 
-struct hash_table {
-	struct bucket *buckets;
-	uint32_t size; /* how many buckets */
+/*
+ * This cuckoo hashing implementation requires 2 tables, each
+ * with his one hash function: pefs_hash1() & pefs_hash2()
+ */
+struct cuckoo_hash_table {
+	struct bucket *buckets1;
+	struct bucket *buckets2;
+	uint32_t size; /* how many buckets in each table */
 	uint32_t nelements;
 };
 
 static int
+pefs_is_prime(uint32_t num)
+{
+	int i;
+
+	if (num == 0)
+		return 1;
+
+	/* XXXgpf: [TODO] Take a look at arithmetics, comparisons between signed/unsigned etc */
+	for (i = 2; i <= sqrt(num); i++)  {
+		if (num % i == 0)
+			return 0;
+	}
+
+	return 1;
+}
+
+static int
+pefs_next_prime(uint32_t num)
+{
+	uint32_t i;
+
+	for (i = num;; i++)  {
+		if (pefs_is_prime(i))
+			return i;
+	}
+}
+
+static int
 pefs_compute_file_checksums(struct file_header *fhp, const EVP_MD *md,
 	uint8_t hash_len)
 {
@@ -127,6 +158,7 @@
 		return (PEFS_ERR_SYS);
 	}
 
+	/* XXXgpf: shouldn't we also check for empty files? */
 	resid = sb.st_size;
 
 	fd = open(fhp->path, O_RDONLY);
@@ -158,9 +190,9 @@
 		EVP_DigestUpdate(&mdctx, xsct.pxsct_ctext, xsct.pxsct_ctext_len);
 
 		dprintf(("read %d bytes from kernel\n\n", bytes_to_read));
-		dprintf(("printing contents of buffer:"));
-		for (i=0; i < (int)bytes_to_read; i++) dprintf(("%c", xsct.pxsct_ctext[i]));
-		dprintf(("!\n"));
+		//dprintf(("printing contents of buffer:"));
+		//for (i=0; i < (int)bytes_to_read; i++) dprintf(("%c", xsct.pxsct_ctext[i]));
+		//dprintf(("!\n"));
 
 		csp = malloc(sizeof(struct checksum));
 		if (csp == NULL) {
@@ -201,6 +233,7 @@
 	nfiles = 0;
 
 	while (fgets(buf, sizeof(buf), fpin) != NULL) {
+		/* XXXgpf: [TODO] check for numeric overflow */
 		nfiles++;
 	}
 
@@ -216,127 +249,205 @@
 }
 
 static void
-pefs_init_hash_table(struct hash_table *checksum_hash_tablep)
+pefs_init_hash_table(struct cuckoo_hash_table *chtp)
 {
-	checksum_hash_tablep->size = 0;
-	checksum_hash_tablep->nelements = 0;
-	checksum_hash_tablep->buckets = NULL;
+	chtp->size = 0;
+	chtp->nelements = 0;
+	chtp->buckets1 = NULL;
+	chtp->buckets2 = NULL;
 }
 
 static int
-pefs_allocate_hash_table(struct hash_table *checksum_hash_tablep, uint32_t nelements)
+pefs_allocate_hash_table(struct cuckoo_hash_table *chtp, uint32_t nelements)
 {
 	uint32_t i;
 
-	/*
-	 * XXXgpf: needs optimization
+	/* 
+	 * spending 15% more space for each table lowers the chance to fall into an 
+	 * infinite loop during cuckoo insertion to about 1.5%.
 	 */
-	checksum_hash_tablep->size = nelements;
-	checksum_hash_tablep->nelements = nelements;
-	checksum_hash_tablep->buckets = malloc (nelements * sizeof(struct bucket));
+	chtp->size = pefs_next_prime(nelements + (nelements * PEFS_EXTRA_TABLE_SIZE));
+	chtp->nelements = nelements;
+	if (chtp->size < chtp->nelements) {
+		pefs_warn("numeric overflow while computing hash table size");
+		return (PEFS_ERR_GENERIC);
+	}
+	dprintf(("hash table elem:%u\tsize: %u\n", chtp->nelements, chtp->size));
 
-	if (checksum_hash_tablep->buckets == NULL) {
+	chtp->buckets1 = malloc (chtp->size * sizeof(struct bucket));
+	if (chtp->buckets1 == NULL) {
 		pefs_warn("memory allocation error");
 		return (PEFS_ERR_SYS);
 	}
 
-	for (i = 0; i < checksum_hash_tablep->size; i++) {
-		checksum_hash_tablep->buckets[i].nelements = 0;
-		LIST_INIT(&(checksum_hash_tablep->buckets[i].file_headers));
+	for (i = 0; i < chtp->size; i++)
+		chtp->buckets1[i].fhp = NULL;
+
+	chtp->buckets2 = malloc (chtp->size * sizeof(struct bucket));
+	if (chtp->buckets2 == NULL) {
+		pefs_warn("memory allocation error");
+		return (PEFS_ERR_SYS);
 	}
 
+	for (i = 0; i < chtp->size; i++)
+		chtp->buckets2[i].fhp = NULL;
+
 	return (0);
 }
 
 static void
-pefs_free_hash_table(struct hash_table *checksum_hash_tablep)
+pefs_free_hash_table(struct cuckoo_hash_table *chtp)
 {
 	struct bucket *bp;
-	struct file_header *fhp, *tfhp;
+	struct file_header *fhp;
 	struct checksum *csp, *tcsp;
 	uint32_t i;
 
-	if (checksum_hash_tablep->buckets != NULL) {
-		for (i = 0; i < checksum_hash_tablep->size; i++) {
-			bp = &checksum_hash_tablep->buckets[i];
-			LIST_FOREACH_SAFE(fhp, &(bp->file_headers), bucket_entries, tfhp) {
+	if (chtp->buckets1 != NULL) {
+		for (i = 0; i < chtp->size; i++) {
+			bp = &chtp->buckets1[i];
+			fhp = bp->fhp;
+			if (fhp != NULL) {
+				TAILQ_FOREACH_SAFE(csp, &(fhp->checksums), checksum_entries, tcsp) {
+					TAILQ_REMOVE(&(fhp->checksums), csp, checksum_entries);
+					if (csp->hash != NULL)
+						free(csp->hash);
+					free(csp);
+				}
+				free(fhp);
+			}
+		}
+		free(chtp->buckets1);
+	}
+
+	if (chtp->buckets2 != NULL) {
+		for (i = 0; i < chtp->size; i++) {
+			bp = &chtp->buckets2[i];
+			fhp = bp->fhp;
+			if (fhp != NULL) {
 				TAILQ_FOREACH_SAFE(csp, &(fhp->checksums), checksum_entries, tcsp) {
 					TAILQ_REMOVE(&(fhp->checksums), csp, checksum_entries);
 					if (csp->hash != NULL)
 						free(csp->hash);
 					free(csp);
 				}
-				LIST_REMOVE(fhp, bucket_entries);
 				free(fhp);
 			}
 		}
-		free(checksum_hash_tablep->buckets);
+		free(chtp->buckets2);
 	}
 }
 
-static int
-pefs_add_to_bucket(struct bucket *bucketp, struct file_header *fhp)
+static uint32_t
+pefs_hash1(struct cuckoo_hash_table *chtp, struct file_header *fhp)
 {
-	struct file_header *elementp;
-	uint32_t i;
-
-	i = 1;
+	uint32_t nbucket;
 
-	if (bucketp->nelements == 0)
-		LIST_INSERT_HEAD(&(bucketp->file_headers), fhp, bucket_entries);
-	else
-		LIST_FOREACH(elementp, &(bucketp->file_headers), bucket_entries) {
-			if (elementp->file_id == fhp->file_id) {
-				warn("file identifier collision detected between files: %s & %s",
-						fhp->path, elementp->path);
-				return (PEFS_ERR_EXIST);
-			}
+	nbucket = fhp->file_id % chtp->size;
+	dprintf(("hash1: goto bucket %d\n", nbucket));
+	return (nbucket);
+}
 
-			if (fhp->file_id < elementp->file_id) {
-				LIST_INSERT_BEFORE(elementp, fhp, bucket_entries);
-				break;
-			}
-			else if (i++ == bucketp->nelements) {
-				LIST_INSERT_AFTER(elementp, fhp, bucket_entries);
-				break;
-			}
-		}
+static uint32_t
+pefs_hash2(struct cuckoo_hash_table *chtp, struct file_header *fhp)
+{
+	uint32_t nbucket;
 
-	bucketp->nelements++;
-	return (0);
+	nbucket = (fhp->file_id / chtp->size) % chtp->size;
+	dprintf(("hash2: goto bucket %d\n", nbucket));
+	return (nbucket);
 }
 
-static struct bucket *
-pefs_find_bucket(struct hash_table *checksum_hash_tablep, struct file_header *fhp)
+static int
+pefs_cuckoo_insert(struct cuckoo_hash_table *chtp,
+	struct file_header *fhp)
 {
-	uint32_t nbucket;
+	struct file_header *elem, *elem1, *elem2;
+	uint32_t i, max_tries, pos1, pos2;
+
+	max_tries = chtp->size;
+	elem = fhp;
 
-	nbucket = fhp->file_id % checksum_hash_tablep->size;
-	dprintf(("goto bucket %d\n", nbucket));
-	return (&(checksum_hash_tablep->buckets[nbucket]));
+	/* file_id collision check */
+	pos1 = pefs_hash1(chtp, elem);
+	elem1 = chtp->buckets1[pos1].fhp;
+	pos2 = pefs_hash2(chtp, elem);
+	elem2 = chtp->buckets2[pos2].fhp;
+	if (elem1 != NULL) {
+		if (elem1->file_id == fhp->file_id) {
+			pefs_warn("file identifier collision detected between files: %s & %s",
+					fhp->path, elem1->path);
+			return (PEFS_ERR_EXIST);
+		}
+	}
+
+	if (elem2 != NULL) {
+		if (elem2->file_id == fhp->file_id) {
+			pefs_warn("file identifier collision detected between files: %s & %s",
+					fhp->path, elem2->path);
+			return (PEFS_ERR_EXIST);
+		}
+	}
+
+	for (i = 0; i < max_tries; i++) {
+		pos1 = pefs_hash1(chtp, elem);
+		elem1 = chtp->buckets1[pos1].fhp;
+		/* do the cuckoo! */
+		chtp->buckets1[pos1].fhp = elem;
+		if (elem1 == NULL)
+			return 0;
+		pos2 = pefs_hash2(chtp, elem1);
+		elem2 = chtp->buckets2[pos2].fhp;
+		/* do the cuckoo! */
+		chtp->buckets2[pos2].fhp = elem1;
+		if (elem2 == NULL)
+			return 0;
+		else
+			elem = elem2;
+	}
+
+	pefs_warn("cuckoo_insert resulted in infinite loop!");
+	return (PEFS_ERR_CUCKOO_LOOP);
 }
 
 static int
-pefs_add_to_hash_table(struct hash_table *checksum_hash_tablep,
+pefs_add_to_hash_table(struct cuckoo_hash_table *chtp,
 	struct file_header *fhp)
 {
-	return (pefs_add_to_bucket(pefs_find_bucket(checksum_hash_tablep, fhp), fhp));
+	return (pefs_cuckoo_insert(chtp, fhp));
 }
 
 /* for debugging purposes */
 static void
-pefs_print_hash_table(struct hash_table *checksum_hash_tablep, uint8_t hash_len)
+pefs_print_hash_table(struct cuckoo_hash_table *chtp, uint8_t hash_len)
 {
 	struct file_header *fhp;
 	struct checksum *csp;
 	uint32_t i,j;
 
-	dprintf(("\n+++Printing Hash Table+++\n\n"));
-	for (i = 0; i < checksum_hash_tablep->size; i++) {
-		dprintf(("\nbucket %d with elements: %u\n", i, checksum_hash_tablep->buckets[i].nelements));
-		LIST_FOREACH(fhp, &(checksum_hash_tablep->buckets[i].file_headers), bucket_entries) {
-			//printf(("\tpath=%s!\t id = %d!\tnhashes = %d\n", fhp->path, (int)fhp->file_id, fhp->nhashes));
-			dprintf(("\tid = %llu!\tnhashes = %d\n", fhp->file_id, fhp->nhashes));
+	dprintf(("\n+++Printing Hash Table 1+++\n\n"));
+	for (i = 0; i < chtp->size; i++) {
+		fhp = chtp->buckets1[i].fhp;
+		dprintf(("\nbucket %d with element: %d\n", i, fhp == NULL ? 0 : 1));
+		if (fhp != NULL) {
+			//dprintf(("\tpath=%s\tid = %llu\tnhashes = %d\n", fhp->path, fhp->file_id, fhp->nhashes));
+			dprintf(("\tid = %llu\tnhashes = %d\n", fhp->file_id, fhp->nhashes));
+			TAILQ_FOREACH(csp, &(fhp->checksums), checksum_entries) {
+				dprintf(("\t\tdigest="));
+				for (j = 0; j < hash_len; j++)
+					dprintf(("%02x", csp->hash[j]));
+				dprintf(("\n"));
+			}
+		}
+	}
+
+	dprintf(("\n+++Printing Hash Table 2+++\n\n"));
+	for (i = 0; i < chtp->size; i++) {
+		fhp = chtp->buckets2[i].fhp;
+		dprintf(("\nbucket %d with element: %d\n", i, fhp == NULL ? 0 : 1));
+		if (fhp != NULL) {
+			//dprintf(("\tpath=%s\tid = %llu\tnhashes = %d\n", fhp->path, fhp->file_id, fhp->nhashes));
+			dprintf(("\tid = %llu\tnhashes = %d\n", fhp->file_id, fhp->nhashes));
 			TAILQ_FOREACH(csp, &(fhp->checksums), checksum_entries) {
 				dprintf(("\t\tdigest="));
 				for (j = 0; j < hash_len; j++)
@@ -477,7 +588,7 @@
  */
 static int
 pefs_create_in_memory_db(FILE *fpin, const EVP_MD *md, uint8_t hash_len,
-	struct hash_table *checksum_hash_tablep, char *fsroot)
+	struct cuckoo_hash_table *chtp, char *fsroot)
 {
 	struct statfs fs;
 	struct file_header *fhp;
@@ -493,7 +604,7 @@
 	if (error != 0)
 		return (error);
 
-	error = pefs_allocate_hash_table(checksum_hash_tablep, nfiles);
+	error = pefs_allocate_hash_table(chtp, nfiles);
 	if (error != 0)
 		return (error);
 
@@ -510,12 +621,12 @@
 		if (error != 0)
 			return (error);
 
-		error = pefs_add_to_hash_table(checksum_hash_tablep, fhp);
+		error = pefs_add_to_hash_table(chtp, fhp);
 		if (error != 0)
 			return (error);
 	}
 
-	pefs_print_hash_table(checksum_hash_tablep, hash_len);
+	pefs_print_hash_table(chtp, hash_len);
 
 	return (error);
 }
@@ -568,69 +679,65 @@
 	return (0);
 }
 
-static int
-pefs_write_bucket(int fdout, struct bucket *bp, uint32_t *buckets_offset)
-{
-	uint32_t offset_to_chain, nelements;
-	int bytes;
-
-	offset_to_chain = htole32(bp->offset_to_chain);
-	bytes = pwrite(fdout, &offset_to_chain, sizeof(offset_to_chain), *buckets_offset);
-	if (bytes != sizeof(offset_to_chain)) {
-		warn("error writing to .pefs.checksum");
-		return (PEFS_ERR_IO);
-	}
-	(*buckets_offset)+= sizeof(offset_to_chain);
-
-	nelements = htole32(bp->nelements);
-	bytes = pwrite(fdout, &nelements, sizeof(nelements), *buckets_offset);
-	if (bytes != sizeof(nelements)) {
-		warn("error writing to .pefs.checksum");
-		return (PEFS_ERR_IO);
-	}
-	(*buckets_offset)+= sizeof(nelements);
-
-	return (0);
-}
-
 /*
  * XXXgpf: [TODO] take a look at chained offsets
  */
 static int
-pefs_write_file_header(int fdout, struct file_header *fhp, uint32_t *fh_offset)
+pefs_write_file_header(int fdout, struct file_header *fhp, uint32_t *buckets_offset)
 {
 	uint64_t file_id;
 	uint32_t nhashes, offset_to_checksums;
 	int bytes;
 
 	nhashes = htole32(fhp->nhashes);
-	bytes = pwrite(fdout, &nhashes, sizeof(nhashes), *fh_offset);
+	bytes = pwrite(fdout, &nhashes, sizeof(nhashes), *buckets_offset);
 	if (bytes != sizeof(nhashes)) {
 		warn("error writing to .pefs.checksum");
 		return (PEFS_ERR_IO);
 	}
-	(*fh_offset)+= sizeof(nhashes);
+	(*buckets_offset)+= sizeof(nhashes);
 
 	offset_to_checksums = htole32(fhp->offset_to_checksums);
-	bytes = pwrite(fdout, &offset_to_checksums, sizeof(offset_to_checksums), *fh_offset);
+	bytes = pwrite(fdout, &offset_to_checksums, sizeof(offset_to_checksums), *buckets_offset);
 	if (bytes != sizeof(offset_to_checksums)) {
 		warn("error writing to .pefs.checksum");
 		return (PEFS_ERR_IO);
 	}
-	(*fh_offset)+= sizeof(offset_to_checksums);
+	(*buckets_offset)+= sizeof(offset_to_checksums);
 
 	file_id = htole64(fhp->file_id);
-	bytes = pwrite(fdout, &file_id, sizeof(file_id), *fh_offset);
+	bytes = pwrite(fdout, &file_id, sizeof(file_id), *buckets_offset);
 	if (bytes != sizeof(file_id)) {
 		warn("error writing to .pefs.checksum");
 		return (PEFS_ERR_IO);
 	}
-	(*fh_offset)+= sizeof(file_id);
+	(*buckets_offset)+= sizeof(file_id);
 
 	return (0);
 }
 
 static int
+pefs_write_bucket(int fdout, struct bucket *bp, uint32_t *buckets_offset)
+{
+	struct file_header emptyfh;
+	struct file_header *fhp;
+
+	fhp = bp->fhp;
+	if (fhp == NULL) {
+		/*
+		 * XXXgpf: empty files are not allowed so nhashes == 0 symbolizes an empty bucket.
+		 * perhaps a bitmap would be better? or we could steal a bit from some data member?
+		 */
+		emptyfh.nhashes = 0;
+		emptyfh.file_id = 0;
+		emptyfh.offset_to_checksums = 0;
+		fhp = &emptyfh;
+	}
+
+	return (pefs_write_file_header(fdout, fhp, buckets_offset));
+}
+
+static int
 pefs_write_hash(int fdout, struct checksum *csp, uint32_t *hashes_offset, uint8_t hash_len)
 {
 	int bytes;
@@ -649,16 +756,32 @@
  * All data member writes are done separately so as to avoid alignment problems.
  * Writes are always in little endian byte order.
  *
- * XXXgpf: [TODO] more comments about internal structure of file. This should probably
- * be done after design crystalizes (cuckoo hashing? embed? etc).
+ * First 16 bytes of .pefs.checksum are filled with .pefs.checksum's file header.
+ * Right after this header lies the 'index' part of our database. This index is later 
+ * kept in kernel memory.
+ *
+ * Index: 
+ * Both hash tables of cuckoo algorithm are written to the file sequentially. The 
+ * first hash table corresponds to hash1() and the second hash table to hash2().
+ * Each bucket (cell) of a hash table contains at most one entry(=file_header).
+ * The size of an entry is 16 bytes.
+ * 
+ * hash table entries end at the following offset: 
+ * 16 + hash_table_size * 2 * 16
+ *
+ * Checksums:
+ * The last part of .pefs.checksum is filled with the actual checksums.
+ * The offset where the first checksum starts is a 512 aligned address.
+ * Each hash table file header entry contains an offset that points to the beginning 
+ * of a chain of checksums for that particular file's 4k blocks.
  */
 static int
-pefs_write_checksum_file(int fdout, struct checksum_file_header *cfhp, struct hash_table *chtp)
+pefs_write_checksum_file(int fdout, struct checksum_file_header *cfhp, struct cuckoo_hash_table *chtp)
 {
 	struct bucket *bp;
 	struct checksum *csp;
 	struct file_header *fhp;
-	uint32_t i, buckets_offset, fh_offset, hashes_offset;
+	uint32_t i, buckets_offset, hashes_offset;
 	int error;
 
 	error = pefs_write_checksum_file_header(fdout, cfhp);
@@ -668,29 +791,40 @@
 	/* this points to where the buckets start */
 	buckets_offset = cfhp->offset_to_hash_table;
 
-	/* this points to where the buckets stop and the file headers start */
-	fh_offset = buckets_offset;
-	fh_offset+= chtp->size * PEFS_BUCKET_SIZE;
-
-	/* this points to where the file headers stop and the checksums start */
-	hashes_offset = fh_offset;
-	hashes_offset+= chtp->nelements * PEFS_FH_SIZE;
+	/* this points to where the buckets stop and the checksums start */
+	hashes_offset = buckets_offset;
+	hashes_offset+= chtp->size * PEFS_FH_SIZE * 2;
 	if (hashes_offset % PEFS_HASH_BYTE_ALIGNMENT != 0)
 		hashes_offset+= PEFS_HASH_BYTE_ALIGNMENT - (hashes_offset % PEFS_HASH_BYTE_ALIGNMENT);
 
 	for (i = 0; i < chtp->size; i++) {
-		bp = &chtp->buckets[i];
-		bp->offset_to_chain = fh_offset;
+		bp = &chtp->buckets1[i];
+		if (bp->fhp != NULL)
+			bp->fhp->offset_to_checksums = hashes_offset;
 		error = pefs_write_bucket(fdout, bp, &buckets_offset);
 		if (error != 0)
 			return (error);
 
-		LIST_FOREACH(fhp, &(chtp->buckets[i].file_headers), bucket_entries) {
-			fhp->offset_to_checksums = hashes_offset;
-			error = pefs_write_file_header(fdout, fhp, &fh_offset);
-			if (error != 0)
-				return (error);
+		fhp = bp->fhp;
+		if (fhp != NULL) {
+			TAILQ_FOREACH(csp, &(fhp->checksums), checksum_entries) {
+				error = pefs_write_hash(fdout, csp, &hashes_offset, cfhp->hash_len);
+				if (error != 0)
+					return (error);
+			}
+		}
+	}
 
+	for (i = 0; i < chtp->size; i++) {
+		bp = &chtp->buckets2[i];
+		if (bp->fhp != NULL)
+			bp->fhp->offset_to_checksums = hashes_offset;
+		error = pefs_write_bucket(fdout, bp, &buckets_offset);
+		if (error != 0)
+			return (error);
+
+		fhp = bp->fhp;
+		if (fhp != NULL) {
 			TAILQ_FOREACH(csp, &(fhp->checksums), checksum_entries) {
 				error = pefs_write_hash(fdout, csp, &hashes_offset, cfhp->hash_len);
 				if (error != 0)
@@ -704,7 +838,7 @@
 
 static void
 pefs_init_checksum_file_header(struct checksum_file_header *cfhp, const char *algo,
-		uint8_t hash_len, struct hash_table *chtp)
+		uint8_t hash_len, struct cuckoo_hash_table *chtp)
 {
 	cfhp->hash_len = hash_len;
 	cfhp->hash_table_size = chtp->size;
@@ -761,7 +895,7 @@
 int
 pefs_create_checksum_file(FILE *fpin, char *fsroot, char *csm_path, const char *algo)
 {
-	struct hash_table checksum_hash_table;
+	struct cuckoo_hash_table checksum_hash_table;
 	struct checksum_file_header cfh;
 	const EVP_MD *md;
 	int error, fdout;
@@ -784,6 +918,11 @@
 
 	error = pefs_create_in_memory_db(fpin, md, hash_len,
 		&checksum_hash_table, fsroot);
+	/*
+	 * XXXgpf: [TODO] Properly handle PEFS_ERR_CUCKOO_LOOP by retrying with
+	 * larger tables (next prime number?). We shouldn't have to reread all
+	 * file entries btw.
+	 */
 	if (error != 0)
 		goto out;
 

Modified: soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.h
==============================================================================
--- soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.h	Fri Jun  1 16:29:59 2012	(r236881)
+++ soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.h	Fri Jun  1 16:30:51 2012	(r236882)
@@ -61,6 +61,7 @@
 #define	PEFS_ERR_NOENT			5
 #define	PEFS_ERR_EXIST			6
 #define	PEFS_ERR_INVALID		7
+#define PEFS_ERR_CUCKOO_LOOP	8
 
 #define	PEFS_FS_IGNORE_TYPE		0x0001