socsvn commit: r237061 - soc2012/gpf/pefs_kmod/sbin/pefs
gpf at FreeBSD.org
gpf at FreeBSD.org
Mon Jun 4 13:31:35 UTC 2012
Author: gpf
Date: Mon Jun 4 13:31:32 2012
New Revision: 237061
URL: http://svnweb.FreeBSD.org/socsvn/?view=rev&rev=237061
Log:
properly handle scenario where cuckoo insertion falls into infinite loop.
new hash tables are allocated where new_size=next_prime(old_size).
all file_header elements are kept in a 'global' tail so that we won't have to
re-produce them, should insertion fail.
Modified:
soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c
soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.h
Modified: soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c
==============================================================================
--- soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c Mon Jun 4 12:49:21 2012 (r237060)
+++ soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c Mon Jun 4 13:31:32 2012 (r237061)
@@ -53,7 +53,7 @@
#include "pefs_ctl.h"
-//#define PEFS_INTEGRITY_DEBUG
+#define PEFS_INTEGRITY_DEBUG
#if defined (PEFS_INTEGRITY_DEBUG)
#define dprintf(a) printf a
#else
@@ -65,6 +65,7 @@
#define PEFS_EXTRA_TABLE_SIZE 15
TAILQ_HEAD(checksum_head, checksum);
+TAILQ_HEAD(file_header_head, file_header);
#define PEFS_CFH_SIZE 16
#define PEFS_FH_SIZE 16
@@ -89,9 +90,9 @@
uint32_t nhashes;
uint64_t file_id;
char path[MAXPATHLEN];
- LIST_ENTRY(file_header) bucket_entries;
uint32_t offset_to_checksums;
struct checksum_head checksums;
+ TAILQ_ENTRY(file_header) file_header_entries;
};
struct bucket {
@@ -132,6 +133,9 @@
{
uint32_t i;
+ if (num == 2 || num == 3)
+ return num;
+
if (num % 2 == 0)
num+=1;
@@ -262,22 +266,30 @@
chtp->buckets1 = NULL;
chtp->buckets2 = NULL;
}
-
+
static int
-pefs_allocate_hash_table(struct cuckoo_hash_table *chtp, uint32_t nelements)
+pefs_allocate_hash_table(struct cuckoo_hash_table *chtp, uint32_t nelements, char reallocate)
{
uint32_t i;
- /*
- * spending 15% more space for each table lowers the chance to fall into an
- * infinite loop during cuckoo insertion to about 1.5%.
- */
- chtp->size = pefs_next_prime(nelements + ((nelements * PEFS_EXTRA_TABLE_SIZE)/100));
- chtp->nelements = nelements;
- if (chtp->size < chtp->nelements) {
- pefs_warn("numeric overflow while computing hash table size");
- return (PEFS_ERR_GENERIC);
+ if (reallocate == 0) {
+ /*
+ * spending 15% more space for each table lowers the chance to fall into an
+ * infinite loop during cuckoo insertion to about 1.5%.
+ */
+ chtp->size = pefs_next_prime(nelements + ((nelements * PEFS_EXTRA_TABLE_SIZE)/100));
+ chtp->nelements = nelements;
+ if (chtp->size < chtp->nelements) {
+ pefs_warn("numeric overflow while computing hash table size");
+ return (PEFS_ERR_GENERIC);
+ }
}
+ else {
+ chtp->size = pefs_next_prime(chtp->size + 1);
+ free(chtp->buckets1);
+ free(chtp->buckets2);
+ }
+
dprintf(("hash table elem:%u\tsize: %u\n", chtp->nelements, chtp->size));
chtp->buckets1 = malloc (chtp->size * sizeof(struct bucket));
@@ -422,7 +434,7 @@
}
pefs_warn("cuckoo_insert resulted in infinite loop!");
- return (PEFS_ERR_CUCKOO_LOOP);
+ return (PEFS_ERR_GENERIC);
}
static int
@@ -593,19 +605,25 @@
/*
* This function creates the in memory database that will be later written to
* the checksum file.
- * A) The total sum of entries is gathered so that a hash table is allocated.
+ * A) The total sum of entries is gathered so that the hash tables are allocated.
* B) For each file entry:
* B1) semantic checks: file should reside in pefs filesystem &
* file should be regular file
* B2) the file_id is retrieved.
* B3) list of checksums is computed for the file's 4k blocks.
- * B4) file entry is added to hash table. (cuckoo hashing is used)
+ * B4) file entry is added to fh_head
+ * C) Cuckoo insertion:
+ * We try to populate our hash tables using the cuckoo algorithm. Should we fall
+ * into an infinite loop during insertion, we re-allocate larger hash tables
+ * and try again until we succeed. The possibility to fail twice in a row is
+ * 1.5% * 1.5% = 0.0225%
*/
static int
pefs_create_in_memory_db(FILE *fpin, const EVP_MD *md, uint8_t hash_len,
struct cuckoo_hash_table *chtp, char *fsroot)
{
struct statfs fs;
+ struct file_header_head fh_head;
struct file_header *fhp;
int error;
uint32_t nfiles;
@@ -619,10 +637,11 @@
if (error != 0)
return (error);
- error = pefs_allocate_hash_table(chtp, nfiles);
+ error = pefs_allocate_hash_table(chtp, nfiles, 0);
if (error != 0)
return (error);
+ TAILQ_INIT(&fh_head);
while((fhp = pefs_next_file(fpin, &error)) != NULL) {
error = pefs_file_semantic_checks(fhp, &fs);
if (error != 0)
@@ -636,11 +655,26 @@
if (error != 0)
return (error);
- error = pefs_add_to_hash_table(chtp, fhp);
- if (error != 0)
- return (error);
+ TAILQ_INSERT_TAIL(&fh_head, fhp, file_header_entries);
}
+cuckoo_insert:
+ TAILQ_FOREACH(fhp, &fh_head, file_header_entries) {
+ error = pefs_add_to_hash_table(chtp, fhp);
+ /*
+ * cuckoo insertion algorithm fell into an infinite loop!
+ * Create new, larger hash tables where size = next_prime(old_size)
+ * and try again.
+ */
+ if (error != 0) {
+ dprintf(("fell into an infinite loop!\n"));
+ error = pefs_allocate_hash_table(chtp, nfiles, 1);
+ if (error != 0)
+ return (error);
+ goto cuckoo_insert;
+ }
+ }
+
pefs_print_hash_table(chtp, hash_len);
return (error);
@@ -933,11 +967,6 @@
error = pefs_create_in_memory_db(fpin, md, hash_len,
&checksum_hash_table, fsroot);
- /*
- * XXXgpf: [TODO] Properly handle PEFS_ERR_CUCKOO_LOOP by retrying with
- * larger tables (next prime number?). We shouldn't have to reread all
- * file entries btw.
- */
if (error != 0)
goto out;
Modified: soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.h
==============================================================================
--- soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.h Mon Jun 4 12:49:21 2012 (r237060)
+++ soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.h Mon Jun 4 13:31:32 2012 (r237061)
@@ -61,7 +61,6 @@
#define PEFS_ERR_NOENT 5
#define PEFS_ERR_EXIST 6
#define PEFS_ERR_INVALID 7
-#define PEFS_ERR_CUCKOO_LOOP 8
#define PEFS_FS_IGNORE_TYPE 0x0001
More information about the svn-soc-all
mailing list