socsvn commit: r237202 - soc2012/gpf/pefs_kmod/sbin/pefs
gpf at FreeBSD.org
gpf at FreeBSD.org
Wed Jun 6 11:03:08 UTC 2012
Author: gpf
Date: Wed Jun 6 11:03:05 2012
New Revision: 237202
URL: http://svnweb.FreeBSD.org/socsvn/?view=rev&rev=237202
Log:
-hardlinks: A rb tree of 'hardlink_counter' structs is used with inodes as keys.
This tree is used to print warnings to the user when the number of links found
for a specific inode is less than the total number of links.
Modified:
soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c
soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.c
Modified: soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c
==============================================================================
--- soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c Wed Jun 6 10:56:59 2012 (r237201)
+++ soc2012/gpf/pefs_kmod/sbin/pefs/pefs_checksum.c Wed Jun 6 11:03:05 2012 (r237202)
@@ -32,6 +32,7 @@
#include <sys/ioctl.h>
#include <sys/mount.h>
#include <sys/queue.h>
+#include <sys/tree.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/fnv_hash.h>
@@ -67,20 +68,32 @@
TAILQ_HEAD(checksum_head, checksum);
TAILQ_HEAD(file_header_head, file_header);
+TAILQ_HEAD(hardlink_fh_head, file_header);
+
+RB_HEAD(hardlink_head, hardlink_counter);
+RB_PROTOTYPE(hardlink_head, hardlink_counter, hardlink_entries, pefs_rb_cmp);
#define PEFS_CFH_SIZE 16
#define PEFS_FH_SIZE 16
+struct hardlink_counter {
+ ino_t inode;
+ uint32_t total_links;
+ uint32_t links_found;
+ struct hardlink_fh_head file_headers;
+ RB_ENTRY(hardlink_counter) hardlink_entries;
+};
+
/* XXXgpf: [TODO] check pathname string lengths. Some are MAXPATHLEN + 1, some MAXPATHLEN */
/* XXXgpf: unions for on disk structs and move to a different header? */
struct checksum_file_header {
- uint8_t version;
- uint8_t reserved;
- uint8_t hash_len;
- uint8_t hash_algo[8];
- uint8_t offset_to_hash_table;
- uint32_t hash_table_size;
+ uint8_t version;
+ uint8_t reserved;
+ uint8_t hash_len;
+ uint8_t hash_algo[8];
+ uint8_t offset_to_hash_table;
+ uint32_t hash_table_size;
};
struct checksum {
@@ -97,6 +110,7 @@
uint32_t offset_to_checksums;
struct checksum_head checksums;
TAILQ_ENTRY(file_header) file_header_entries;
+ TAILQ_ENTRY(file_header) fh_hardlink_entries;
};
struct bucket {
@@ -471,6 +485,7 @@
if (fhp != NULL) {
//dprintf(("\tpath=%s\tid = %llu\tnhashes = %d\n", fhp->path, fhp->file_id, fhp->nhashes));
dprintf(("\tid = %llu\tnhashes = %d\n", fhp->file_id, fhp->nhashes));
+ dprintf(("\tpath = %s\n", fhp->path));
TAILQ_FOREACH(csp, &(fhp->checksums), checksum_entries) {
dprintf(("\t\tdigest="));
for (j = 0; j < hash_len; j++)
@@ -548,8 +563,87 @@
return (error);
}
+/* XXXgpf: for debugging purposes */
+static void
+pefs_rb_print(struct hardlink_head *hlc_headp)
+{
+ struct hardlink_counter *hlcp;
+ struct file_header *fhp;
+
+ dprintf(("\n+++Printing RB tree+++\n\n"));
+ RB_FOREACH(hlcp, hardlink_head, hlc_headp) {
+ dprintf(("inode %d\ttotal links %d\tlinks found %d\n",
+ hlcp->inode, hlcp->total_links, hlcp->links_found));
+ TAILQ_FOREACH(fhp, &(hlcp->file_headers), fh_hardlink_entries) {
+ dprintf(("\tpath: %s\n", fhp->path));
+ }
+ }
+}
+
+static void
+pefs_rb_warn(struct hardlink_head *hlc_headp)
+{
+ struct hardlink_counter *hlcp;
+ struct file_header *fhp;
+ int i;
+
+ RB_FOREACH(hlcp, hardlink_head, hlc_headp) {
+ if (hlcp->total_links > hlcp->links_found) {
+ pefs_warn("%d hard links of total %d were found in input list for file with inode: %d",
+ hlcp->links_found, hlcp->total_links, hlcp->inode);
+ i = 1;
+ TAILQ_FOREACH(fhp, &(hlcp->file_headers), fh_hardlink_entries) {
+ pefs_warn("link %d: %s", i++, fhp->path);
+ }
+ }
+ }
+}
+
+/* XXXgpf: [TODO] comments */
+static int
+pefs_rb_insert(struct hardlink_head *hlc_headp, struct file_header *fhp, struct stat *sbp)
+{
+ struct hardlink_counter find, *res, *new_hlcp;
+
+ find.inode = sbp->st_ino;
+ res = RB_FIND(hardlink_head, hlc_headp, &find);
+
+ if (res != NULL) {
+ res->links_found++;
+ TAILQ_INSERT_TAIL(&(res->file_headers), fhp, fh_hardlink_entries);
+ }
+ else {
+ new_hlcp = malloc(sizeof(struct hardlink_counter));
+ if (new_hlcp == NULL) {
+ warn("memory allocation error");
+ return (PEFS_ERR_SYS);
+ }
+
+ new_hlcp->inode = sbp->st_ino;
+ new_hlcp->total_links = sbp->st_nlink;
+ new_hlcp->links_found = 1;
+ TAILQ_INIT(&(new_hlcp->file_headers));
+ TAILQ_INSERT_TAIL(&(new_hlcp->file_headers), fhp, fh_hardlink_entries);
+
+ RB_INSERT(hardlink_head, hlc_headp, new_hlcp);
+ }
+
+ return (0);
+}
+
static int
-pefs_file_semantic_checks(struct file_header *fhp, struct statfs *fsp)
+pefs_rb_cmp(struct hardlink_counter *hlcp1, struct hardlink_counter *hlcp2)
+{
+ if (hlcp1->inode < hlcp2->inode)
+ return -1;
+ else if (hlcp1->inode > hlcp2->inode)
+ return 1;
+ else
+ return 0;
+}
+
+static int
+pefs_file_semantic_checks(struct file_header *fhp, struct statfs *fsp, struct hardlink_head *hlc_headp)
{
char parent_dir[MAXPATHLEN];
char sbuf[MAXPATHLEN];
@@ -647,6 +741,11 @@
fhp->path, fsp->f_mntonname);
return (PEFS_ERR_INVALID);
}
+
+ /* Keep all hardlink file headers in a rb tree */
+ if (sb.st_nlink > 1)
+ return (pefs_rb_insert(hlc_headp, fhp, &sb));
+
return (0);
}
@@ -687,16 +786,24 @@
* the checksum file.
* A) The total sum of entries is gathered so that the hash tables are allocated.
* B) For each file entry:
- * B1) semantic checks: file should reside in pefs filesystem &
- * file should be regular file
+ * B1) semantic checks:
+ * B1a) file should reside in pefs filesystem & file should be regular file.
+ * B1b) if symlink, acquire and save the absolute path of the symlink's
+ * target. Try to stat() the target but don't do anything else.
+ * B1c) If hardlink, save a reference to this file entry in our rb tree.
+ * rb-tree uses inodes as keys and is used in part C to print warnings.
* B2) the file_id is retrieved.
* B3) list of checksums is computed for the file's 4k blocks.
- * B4) file entry is added to fh_head
- * C) Cuckoo insertion:
+ * B4) file entry is added to universal fh_head.
+ * C) Print warnings for hardlinks if the number of links found in inputlist isn't
+ * equal to the number of total inode links.
+ * D) Cuckoo insertion:
* We try to populate our hash tables using the cuckoo algorithm. Should we fall
* into an infinite loop during insertion, we re-allocate larger hash tables
* and try again until we succeed. The possibility to fail twice in a row is
* 1.5% * 1.5% = 0.0225%
+ *
+ * XXXgpf: [TODO] more comments
*/
static int
pefs_create_in_memory_db(FILE *fpin, const EVP_MD *md, uint8_t hash_len,
@@ -704,6 +811,7 @@
{
struct statfs fs;
struct file_header_head fh_head;
+ struct hardlink_head hlc_head;
struct file_header *fhp;
int error;
uint32_t nfiles;
@@ -722,8 +830,9 @@
return (error);
TAILQ_INIT(&fh_head);
+ RB_INIT(&hlc_head);
while((fhp = pefs_next_file(fpin, &error)) != NULL) {
- error = pefs_file_semantic_checks(fhp, &fs);
+ error = pefs_file_semantic_checks(fhp, &fs, &hlc_head);
if (error != 0)
return (error);
@@ -742,6 +851,12 @@
if (error != 0)
return (error);
+ pefs_rb_print(&hlc_head);
+ pefs_rb_warn(&hlc_head);
+ /*
+ * XXXgpf: [TODO] print warnings for dem hardlinks
+ */
+
cuckoo_insert:
TAILQ_FOREACH(fhp, &fh_head, file_header_entries) {
error = pefs_add_to_hash_table(chtp, fhp);
@@ -1067,3 +1182,5 @@
return (error);
}
+
+RB_GENERATE(hardlink_head, hardlink_counter, hardlink_entries, pefs_rb_cmp);
Modified: soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.c
==============================================================================
--- soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.c Wed Jun 6 10:56:59 2012 (r237201)
+++ soc2012/gpf/pefs_kmod/sbin/pefs/pefs_ctl.c Wed Jun 6 11:03:05 2012 (r237202)
@@ -1032,6 +1032,12 @@
int error, i, j;
const char *algo;
+ /*
+ * XXXgpf: [TODO] Now, all input file entries are kept in a 'global' tail structure
+ * and insertion into hash table occurs after all of them are read/parsed. Therefore,
+ * it is possible to have fpin = stdin by default and not require an input file, since
+ * we will not have to go through the input list twice, thus requiring a rewind().
+ */
fpin = NULL;
/* by default use sha256 */
algo = supported_digests[0];
More information about the svn-soc-all
mailing list