socsvn commit: r237202 - soc2012/gpf/pefs_kmod/sbin/pefs

gpf at FreeBSD.org gpf at FreeBSD.org
Wed Jun 6 11:03:08 UTC 2012


Author: gpf
Date: Wed Jun  6 11:03:05 2012
New Revision: 237202
URL: http://svnweb.FreeBSD.org/socsvn/?view=rev&rev=237202

Log:
  -hardlinks: A rb tree of 'hardlink_counter' structs  is used with inodes as keys.
  This tree is used to print warnings to the user when the number of links found
  for a specific inode is less than the total number of links.
  

Modified:
  soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c
  soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.c

Modified: soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c
==============================================================================
--- soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c	Wed Jun  6 10:56:59 2012	(r237201)
+++ soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c	Wed Jun  6 11:03:05 2012	(r237202)
@@ -32,6 +32,7 @@
 #include <sys/ioctl.h>
 #include <sys/mount.h>
 #include <sys/queue.h>
+#include <sys/tree.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/fnv_hash.h>
@@ -67,20 +68,32 @@
 
 TAILQ_HEAD(checksum_head, checksum);
 TAILQ_HEAD(file_header_head, file_header);
+TAILQ_HEAD(hardlink_fh_head, file_header);
+
+RB_HEAD(hardlink_head, hardlink_counter);
+RB_PROTOTYPE(hardlink_head, hardlink_counter, hardlink_entries, pefs_rb_cmp);
 
 #define PEFS_CFH_SIZE 16
 #define PEFS_FH_SIZE 16
 
+struct hardlink_counter {
+	ino_t inode;
+	uint32_t total_links;
+	uint32_t links_found;
+	struct hardlink_fh_head file_headers;
+	RB_ENTRY(hardlink_counter) hardlink_entries;
+};
+
 /* XXXgpf: [TODO] check pathname string lengths. Some are MAXPATHLEN + 1, some MAXPATHLEN */
 
 /* XXXgpf: unions for on disk structs and move to a different header? */
 struct checksum_file_header {
-        uint8_t version;
-        uint8_t reserved;
-        uint8_t hash_len;
-        uint8_t hash_algo[8];
-        uint8_t offset_to_hash_table;
-        uint32_t hash_table_size;
+	uint8_t version;
+	uint8_t reserved;
+	uint8_t hash_len;
+	uint8_t hash_algo[8];
+	uint8_t offset_to_hash_table;
+	uint32_t hash_table_size;
 };
 
 struct checksum {
@@ -97,6 +110,7 @@
 	uint32_t offset_to_checksums;
 	struct checksum_head checksums;
 	TAILQ_ENTRY(file_header) file_header_entries;
+	TAILQ_ENTRY(file_header) fh_hardlink_entries;
 };
 
 struct bucket {
@@ -471,6 +485,7 @@
 		if (fhp != NULL) {
 			//dprintf(("\tpath=%s\tid = %llu\tnhashes = %d\n", fhp->path, fhp->file_id, fhp->nhashes));
 			dprintf(("\tid = %llu\tnhashes = %d\n", fhp->file_id, fhp->nhashes));
+			dprintf(("\tpath = %s\n", fhp->path));
 			TAILQ_FOREACH(csp, &(fhp->checksums), checksum_entries) {
 				dprintf(("\t\tdigest="));
 				for (j = 0; j < hash_len; j++)
@@ -548,8 +563,87 @@
 	return (error);
 }
 
+/* XXXgpf: for debugging purposes */
+static void
+pefs_rb_print(struct hardlink_head *hlc_headp)
+{
+	struct hardlink_counter *hlcp;
+	struct file_header *fhp;
+
+	dprintf(("\n+++Printing RB tree+++\n\n"));
+	RB_FOREACH(hlcp, hardlink_head, hlc_headp) {
+		dprintf(("inode %d\ttotal links %d\tlinks found %d\n",
+			hlcp->inode, hlcp->total_links, hlcp->links_found));
+		TAILQ_FOREACH(fhp, &(hlcp->file_headers), fh_hardlink_entries) {
+			dprintf(("\tpath: %s\n", fhp->path));
+		}
+	}
+}
+
+static void
+pefs_rb_warn(struct hardlink_head *hlc_headp)
+{
+	struct hardlink_counter *hlcp;
+	struct file_header *fhp;
+	int i;
+
+	RB_FOREACH(hlcp, hardlink_head, hlc_headp) {
+		if (hlcp->total_links > hlcp->links_found) {
+			pefs_warn("%d hard links of total %d were found in input list for file with inode: %d",
+				hlcp->links_found, hlcp->total_links, hlcp->inode);
+			i = 1;
+			TAILQ_FOREACH(fhp, &(hlcp->file_headers), fh_hardlink_entries) {
+				pefs_warn("link %d: %s", i++, fhp->path);
+			}
+		}
+	}
+}
+
+/* XXXgpf: [TODO] comments */
+static int
+pefs_rb_insert(struct hardlink_head *hlc_headp, struct file_header *fhp, struct stat *sbp)
+{
+	struct hardlink_counter find, *res, *new_hlcp;
+
+	find.inode = sbp->st_ino;
+	res = RB_FIND(hardlink_head, hlc_headp, &find);
+
+	if (res != NULL) {
+		res->links_found++;
+		TAILQ_INSERT_TAIL(&(res->file_headers), fhp, fh_hardlink_entries);
+	}
+	else {
+		new_hlcp = malloc(sizeof(struct hardlink_counter));
+		if (new_hlcp == NULL) {
+			warn("memory allocation error");
+			return (PEFS_ERR_SYS);
+		}
+
+		new_hlcp->inode = sbp->st_ino;
+		new_hlcp->total_links = sbp->st_nlink;
+		new_hlcp->links_found = 1;
+		TAILQ_INIT(&(new_hlcp->file_headers));
+		TAILQ_INSERT_TAIL(&(new_hlcp->file_headers), fhp, fh_hardlink_entries);
+
+		RB_INSERT(hardlink_head, hlc_headp, new_hlcp);
+	}
+
+	return (0);
+}
+
 static int
-pefs_file_semantic_checks(struct file_header *fhp, struct statfs *fsp)
+pefs_rb_cmp(struct hardlink_counter *hlcp1, struct hardlink_counter *hlcp2)
+{
+	if (hlcp1->inode < hlcp2->inode)
+		return -1;
+	else if (hlcp1->inode > hlcp2->inode)
+		return 1;
+	else
+		return 0;
+}
+
+static int
+pefs_file_semantic_checks(struct file_header *fhp, struct statfs *fsp, struct hardlink_head *hlc_headp)
 {
 	char parent_dir[MAXPATHLEN];
 	char sbuf[MAXPATHLEN];
@@ -647,6 +741,11 @@
 			fhp->path, fsp->f_mntonname);
 		return (PEFS_ERR_INVALID);
 	}
+
+	/* Keep all hardlink file headers in a rb tree */
+	if (sb.st_nlink > 1)
+		return (pefs_rb_insert(hlc_headp, fhp, &sb));
+
 	return (0);
 }
 
@@ -687,16 +786,24 @@
  * the checksum file.
  * A) The total sum of entries is gathered so that the hash tables are allocated.
  * B) For each file entry:
- * 		B1) semantic checks: file should reside in pefs filesystem &
- * 			file should be regular file
+ * 		B1) semantic checks: 
+ * 			B1a) file should reside in pefs filesystem & file should be regular file. 
+ * 			B1b) if symlink, acquire and save the absolute path of the symlink's 
+ * 				target. Try to stat() the target but don't do anything else.
+ * 			B1c) If hardlink, save a reference to this file entry in our rb tree.
+ * 			rb-tree uses inodes as keys and is used in part C to print warnings.
  * 		B2) the file_id is retrieved.
  * 		B3) list of checksums is computed for the file's 4k blocks.
- * 		B4) file entry is added to fh_head
- * C) Cuckoo insertion:
+ * 		B4) file entry is added to universal fh_head.
+ * C) Print warnings for hardlinks if the number of links found in inputlist isn't
+ * equal to the number of total inode links.
+ * D) Cuckoo insertion:
  * We try to populate our hash tables using the cuckoo algorithm. Should we fall
  * into an infinite loop during insertion, we re-allocate larger hash tables
  * and try again until we succeed. The possibility to fail twice in a row is
  * 1.5% * 1.5% = 0.0225%
+ *
+ * XXXgpf: [TODO] more comments
  */
 static int
 pefs_create_in_memory_db(FILE *fpin, const EVP_MD *md, uint8_t hash_len,
@@ -704,6 +811,7 @@
 {
 	struct statfs fs;
 	struct file_header_head fh_head;
+	struct hardlink_head hlc_head;
 	struct file_header *fhp;
 	int error;
 	uint32_t nfiles;
@@ -722,8 +830,9 @@
 		return (error);
 
 	TAILQ_INIT(&fh_head);
+	RB_INIT(&hlc_head);
 	while((fhp = pefs_next_file(fpin, &error)) != NULL) {
-		error = pefs_file_semantic_checks(fhp, &fs);
+		error = pefs_file_semantic_checks(fhp, &fs, &hlc_head);
 		if (error != 0)
 			return (error);
 
@@ -742,6 +851,12 @@
 	if (error != 0)
 		return (error);
 
+	pefs_rb_print(&hlc_head);
+	pefs_rb_warn(&hlc_head);
+	/*
+	 * XXXgpf: [TODO] print warnings for dem hardlinks
+	 */
+
 cuckoo_insert:
 	TAILQ_FOREACH(fhp, &fh_head, file_header_entries) {
 		error = pefs_add_to_hash_table(chtp, fhp);
@@ -1067,3 +1182,5 @@
 
 	return (error);
 }
+
+RB_GENERATE(hardlink_head, hardlink_counter, hardlink_entries, pefs_rb_cmp);

Modified: soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.c
==============================================================================
--- soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.c	Wed Jun  6 10:56:59 2012	(r237201)
+++ soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.c	Wed Jun  6 11:03:05 2012	(r237202)
@@ -1032,6 +1032,12 @@
 	int error, i, j;
 	const char *algo;
 
+	/*
+	 * XXXgpf: [TODO] Now, all input file entries are kept in a 'global' tail structure
+	 * and insertion into hash table occurs after all of them are read/parsed. Therefore,
+	 * it is possible to have fpin = stdin by default and not require an input file, since
+	 * we will not have to go through the input list twice, thus requiring a rewind().
+	 */
 	fpin = NULL;
 	/* by default use sha256 */
 	algo = supported_digests[0];


More information about the svn-soc-all mailing list