svn commit: r226136 - in projects/diffused_head/sys:
modules/diffuse modules/diffuse/diffuse_classifier_c45 netinet/ipfw
Lawrence Stewart
lstewart at FreeBSD.org
Sat Oct 8 05:28:18 UTC 2011
Author: lstewart
Date: Sat Oct 8 05:28:17 2011
New Revision: 226136
URL: http://svn.freebsd.org/changeset/base/226136
Log:
A C4.5 decision tree classifier implementation in the form of a DIFFUSE
classifier kernel module. This type of classifier has been empirically shown to
perform the fastest and highest accuracy classifications compared to a set of
common classification algorithms.
Sponsored by: FreeBSD Foundation
Reviewed by: bz
Added:
projects/diffused_head/sys/modules/diffuse/diffuse_classifier_c45/
projects/diffused_head/sys/modules/diffuse/diffuse_classifier_c45/Makefile (contents, props changed)
projects/diffused_head/sys/netinet/ipfw/diffuse_classifier_c45.c (contents, props changed)
projects/diffused_head/sys/netinet/ipfw/diffuse_classifier_c45.h (contents, props changed)
Modified:
projects/diffused_head/sys/modules/diffuse/Makefile
Modified: projects/diffused_head/sys/modules/diffuse/Makefile
==============================================================================
--- projects/diffused_head/sys/modules/diffuse/Makefile Sat Oct 8 05:15:13 2011 (r226135)
+++ projects/diffused_head/sys/modules/diffuse/Makefile Sat Oct 8 05:28:17 2011 (r226136)
@@ -1,6 +1,7 @@
# $FreeBSD$
SUBDIR= diffuse \
+ diffuse_classifier_c45 \
diffuse_feature_iat \
diffuse_feature_iatbd \
diffuse_feature_pcnt \
Added: projects/diffused_head/sys/modules/diffuse/diffuse_classifier_c45/Makefile
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ projects/diffused_head/sys/modules/diffuse/diffuse_classifier_c45/Makefile Sat Oct 8 05:28:17 2011 (r226136)
@@ -0,0 +1,8 @@
+# $FreeBSD$
+
+.PATH: ${.CURDIR}/../../../netinet/ipfw
+
+KMOD= diffuse_classifier_c45
+SRCS= diffuse_classifier_c45.c
+
+.include <bsd.kmod.mk>
Added: projects/diffused_head/sys/netinet/ipfw/diffuse_classifier_c45.c
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ projects/diffused_head/sys/netinet/ipfw/diffuse_classifier_c45.c Sat Oct 8 05:28:17 2011 (r226136)
@@ -0,0 +1,237 @@
+/*-
+ * Copyright (c) 2010-2011
+ * Swinburne University of Technology, Melbourne, Australia.
+ * All rights reserved.
+ *
+ * This software was developed at the Centre for Advanced Internet
+ * Architectures, Swinburne University of Technology, by Sebastian Zander, made
+ * possible in part by a gift from The Cisco University Research Program Fund, a
+ * corporate advised fund of Silicon Valley Community Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * DIFFUSE C4.5 classifier.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#ifdef _KERNEL
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#endif /* _KERNEL */
+#include <sys/socket.h>
+
+#include <net/if.h>
+
+#include <netinet/in.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip_diffuse.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <netinet/tcp.h>
+
+#include <netinet/ipfw/diffuse_common.h>
+#include <netinet/ipfw/diffuse_classifier.h>
+#include <netinet/ipfw/diffuse_classifier_c45.h>
+#ifdef _KERNEL
+#include <netinet/ipfw/diffuse_classifier_module.h>
+#include <netinet/ipfw/diffuse_private.h>
+#endif
+
+#ifdef _KERNEL
+
+/* Computes the buffer size needed to store classifier. */
+static int
+get_size(struct di_classifier_c45_config *c)
+{
+
+ return (sizeof(struct di_classifier_c45_config) + c->tree_len);
+}
+
+/* Assumes target memory is allocated. */
+static void
+cpy_conf(struct di_classifier_c45_config *f, struct di_classifier_c45_config *t)
+{
+
+ t->oid = f->oid;
+ strcpy(t->model_name, f->model_name);
+ t->feature_cnt = f->feature_cnt;
+ t->class_cnt = f->class_cnt;
+ t->multi = f->multi;
+ t->tree_len = f->tree_len;
+ memcpy(t->nodes, f->nodes, f->tree_len);
+}
+
+static int
+c45_init_instance(struct di_cdata *cdata, struct di_oid *params)
+{
+ struct di_classifier_c45_config *c, *conf;
+
+ c = (struct di_classifier_c45_config *)params;
+
+ DID("class cnt %d", c->class_cnt);
+ DID("attr cnt %d", c->feature_cnt);
+ DID("multi %d", (1 << c->multi));
+ DID("tree_len %d", c->tree_len);
+ DID("want size %d", get_size(c));
+
+ cdata->conf = malloc(get_size(c), M_DIFFUSE, M_NOWAIT | M_ZERO);
+ if (cdata->conf == NULL)
+ return (ENOMEM);
+
+ conf = (struct di_classifier_c45_config *)cdata->conf;
+ cpy_conf(c, conf);
+
+ return (0);
+}
+
+static int
+c45_destroy_instance(struct di_cdata *cdata)
+{
+
+ free(cdata->conf, M_DIFFUSE);
+
+ return (0);
+}
+
+static int
+c45_get_conf(struct di_cdata *cdata, struct di_oid *cbuf, int size_only)
+{
+ int len;
+
+ len = get_size((struct di_classifier_c45_config*)cdata->conf);
+
+ if (!size_only)
+ cpy_conf((struct di_classifier_c45_config *)cdata->conf,
+ (struct di_classifier_c45_config *)cbuf);
+
+ return (len);
+}
+
+#endif
+
+int
+c45_classify(struct di_cdata *cdata, int32_t *features, int fcnt)
+{
+ struct di_classifier_c45_config *conf;
+ struct di_c45_node_real *nodes;
+ uint64_t fval;
+ int n;
+
+ conf = (struct di_classifier_c45_config *)cdata->conf;
+ nodes = (struct di_c45_node_real *)conf->nodes;
+ n = 0;
+
+#ifdef DIFFUSE_DEBUG2
+ printf("DIFFUSE: %-10s features ", __func__);
+ for (int i = 0; i < fcnt; i++)
+ printf("%u ", features[i]);
+ printf("\n");
+#endif
+
+ while (n < conf->tree_len / sizeof(struct di_c45_node_real)) {
+ if (nodes[n].nid.feature > fcnt - 1)
+ return (-1); /* Should never happen. */
+
+ fval = features[nodes[n].nid.feature] * (1 << conf->multi);
+
+ switch (nodes[n].nid.type) {
+ case DI_C45_REAL:
+ if (fval <= nodes[n].val) {
+ if (nodes[n].le_type & DI_C45_CLASS)
+ return (nodes[n].le_id);
+ else
+ n = nodes[n].le_id;
+ } else {
+ if (nodes[n].gt_type & DI_C45_CLASS)
+ return (nodes[n].gt_id);
+ else
+ n = nodes[n].gt_id;
+ }
+ break;
+
+ case DI_C45_BNOM:
+ if (fval == nodes[n].val) {
+ if (nodes[n].le_type & DI_C45_CLASS)
+ return (nodes[n].le_id);
+ else
+ n = nodes[n].le_id;
+ } else {
+ if (nodes[n].gt_type & DI_C45_CLASS)
+ return (nodes[n].gt_id);
+ else
+ n = nodes[n].gt_id;
+ }
+ break;
+
+ case DI_C45_NOM:
+ /* XXX: Not supported yet. */
+ return (-1);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return (-1);
+}
+
+#ifdef _KERNEL
+
+static int
+c45_get_feature_cnt(struct di_cdata *cdata)
+{
+
+ return (((struct di_classifier_c45_config *)cdata->conf)->feature_cnt);
+}
+
+static int
+c45_get_class_cnt(struct di_cdata *cdata)
+{
+
+ return (((struct di_classifier_c45_config *)cdata->conf)->class_cnt);
+}
+
+static struct di_classifier_alg di_c45_desc = {
+ _FI( .name = ) "c4.5",
+ _FI( .ref_count = ) 0,
+
+ _FI( .init_instance = ) c45_init_instance,
+ _FI( .destroy_instance = ) c45_destroy_instance,
+ _FI( .get_conf = ) c45_get_conf,
+ _FI( .classify = ) c45_classify,
+ _FI( .get_feature_cnt = ) c45_get_feature_cnt,
+ _FI( .get_class_cnt = ) c45_get_class_cnt,
+};
+
+DECLARE_DIFFUSE_CLASSIFIER_MODULE(c45, &di_c45_desc);
+
+#endif
Added: projects/diffused_head/sys/netinet/ipfw/diffuse_classifier_c45.h
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ projects/diffused_head/sys/netinet/ipfw/diffuse_classifier_c45.h Sat Oct 8 05:28:17 2011 (r226136)
@@ -0,0 +1,115 @@
+/*-
+ * Copyright (c) 2010-2011
+ * Swinburne University of Technology, Melbourne, Australia.
+ * All rights reserved.
+ *
+ * This software was developed at the Centre for Advanced Internet
+ * Architectures, Swinburne University of Technology, by Sebastian Zander, made
+ * possible in part by a gift from The Cisco University Research Program Fund, a
+ * corporate advised fund of Silicon Valley Community Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * DIFFUSE C4.5 classifier.
+ */
+
+#ifndef _NETINET_IPFW_DIFFUSE_CLASSIFIER_C45_H_
+#define _NETINET_IPFW_DIFFUSE_CLASSIFIER_C45_H_
+
+/* Bit flags for struct di_c45_node_real gt_type/le_type fields. */
+#define DI_C45_CLASS 0x01
+#define DI_C45_NODE 0x02
+#define DI_C45_FEAT 0x04
+
+/* Values for struct di_c45_node type field. */
+#define DI_C45_BNOM 1
+#define DI_C45_NOM 2
+#define DI_C45_REAL 3
+
+/* NOTE: Number of classes, features limited to 256. */
+
+struct di_c45_node {
+ /* Node type (split node vs leaf), value type (nominal vs real). */
+ uint16_t type;
+ /* Feature number. */
+ uint8_t feature;
+ /* Class if feature missing. */
+ uint8_t missing_class;
+};
+
+struct di_c45_node_real {
+ struct di_c45_node nid;
+
+ /* Split value. */
+ int64_t val;
+ /*
+ * le = less-equal class (class or node depending on type).
+ * ge = greater-than class (class or node depending on type).
+ */
+ uint16_t le_id;
+ uint16_t gt_id;
+ uint8_t le_type;
+ uint8_t gt_type;
+};
+
+struct di_c45_node_bin_nominal {
+ struct di_c45_node nid;
+
+ int64_t val;
+ /* eq = equal class (class or node depending on type). */
+ uint16_t eq_id;
+ /* ne = non-equal class (class or node depending on type). */
+ uint16_t ne_id;
+ uint8_t eq_type;
+ uint8_t ne_type;
+};
+
+/* XXX: No support for non-binary nominal yet. */
+
+struct di_classifier_c45_config
+{
+ struct di_oid oid;
+
+ char model_name[DI_MAX_MODEL_STR_LEN];
+ /* Number of features. */
+ uint16_t feature_cnt;
+ /* Number of classes. */
+ uint16_t class_cnt;
+ /* Precsion, multipler for double->int. */
+ uint16_t multi;
+ /* Length of nodes. */
+ uint16_t tree_len;
+ /* Tree. */
+ struct di_c45_node nodes[];
+};
+
+struct di_classifier_module * c45_module(void);
+
+struct di_cdata;
+
+int c45_classify(struct di_cdata *cdata, int32_t *features, int fcnt);
+
+#endif /* _NETINET_IPFW_DIFFUSE_CLASSIFIER_C45_H_ */
More information about the svn-src-projects
mailing list