socsvn commit: r240185 - in soc2012/tzabal/server-side: akcrs-handler akcrs-release/9.0.0/usr.sbin/crashreportd

Wed Aug 8 04:52:06 UTC 2012

Author: tzabal
Date: Wed Aug  8 04:52:04 2012
New Revision: 240185
URL: http://svnweb.FreeBSD.org/socsvn/?view=rev&rev=240185

Log:
  Implementation of the Recognize phase and complete reorganization of the crashreportd program. The single /usr/sbin/crashreportd module has been divided in several modules that lie under the server-side/akcrs-handler directory.

Added:
  soc2012/tzabal/server-side/akcrs-handler/
  soc2012/tzabal/server-side/akcrs-handler/confirm_report.wsgi   (contents, props changed)
  soc2012/tzabal/server-side/akcrs-handler/crashreport.py
  soc2012/tzabal/server-side/akcrs-handler/database.py
  soc2012/tzabal/server-side/akcrs-handler/main.py
  soc2012/tzabal/server-side/akcrs-handler/settings.py
Modified:
  soc2012/tzabal/server-side/akcrs-release/9.0.0/usr.sbin/crashreportd/crashreportd.py

Added: soc2012/tzabal/server-side/akcrs-handler/confirm_report.wsgi
==============================================================================

--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ soc2012/tzabal/server-side/akcrs-handler/confirm_report.wsgi	Wed Aug  8 04:52:04 2012	(r240185)
@@ -0,0 +1,49 @@
+import urlparse
+# Importing the cgi module leads to an error when accessing the web page
+#from cgi import escape
+
+import database
+
+def application(environ, start_response):
+    response_body = 'Invalid confirmation code.'
+    
+    db = database.Database()
+    if not db.connection:
+        response_body = 'Could not connect to database.'
+    
+    if environ['REQUEST_METHOD'] == 'GET':
+        
+        parameters = urlparse.parse_qs(environ['QUERY_STRING'])
+        
+        if 'id' in parameters and 'code' in parameters:
+            report_id = parameters['id'][0]
+            code = parameters['code'][0]
+            
+            db.query = ('SELECT 1'
+                        'FROM reports '
+                        'WHERE id = %s AND confirmation_code = %s AND '
+                        'confirmed = %s')
+            db.values = (report_id, code, False)
+            
+            if not db.execute_query():
+                response_body = 'Could not execute the query.'
+            
+            if db.cursor.rowcount == 1:
+                db.query = 'UPDATE Reports SET confirmed = %s WHERE id = %s'
+                db.values = (True, id)
+                
+                if not db.execute_query():
+                    response_body = 'Could not execute the query.'
+                
+                db.save()
+                db.cursor.close()
+                db.connection.close()
+                
+                response_body = 'Your report has been confirmed succesfully.'
+    
+    status = '200 OK'
+    response_headers = [('Content-type', 'text/html')]
+    
+    start_response(status, response_headers)
+    
+    return [response_body]

Added: soc2012/tzabal/server-side/akcrs-handler/crashreport.py
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ soc2012/tzabal/server-side/akcrs-handler/crashreport.py	Wed Aug  8 04:52:04 2012	(r240185)
@@ -0,0 +1,130 @@
+import logging
+import os
+import re
+import tarfile
+from StringIO import StringIO
+
+from lxml import etree
+
+class CrashReport(object):
+    """This class represents a crash report."""
+    
+    valid_name = re.compile('^crashreport\.[A-Za-z0-9]{6}\.tar\.gz$')
+    
+    def __init__(self, path):
+        name = os.path.basename(path)
+        
+        self.name = name
+        self.path = path
+        self.confirmation_code = None
+        self.data = CrashData()
+    
+    
+    def has_valid_name(self):
+        """Returns True is the report's name matches the name of a valid crash
+        report. Otherwise it returns implicit False."""
+        match = re.match(self.__class__.valid_name, self.name)
+        
+        if not match:
+            logging.info('Invalid crash report name: %s' % self.name)
+            return
+        
+        return True
+    
+    
+    def has_valid_type(self):
+        """Returns True if the report's file type matches the file type of a
+        valid crash report. Otherwise it returns implicit False."""
+        if not tarfile.is_tarfile(self.path):
+            logging.info('The report %s cannot be read from the tarfile module'
+                         % self.path)
+            return
+        
+        try:
+            tarfileobj = tarfile.open(self.path, 'r:gz')
+        except tarfile.ReadError:
+            logging.info('The provided mode is not suitable to open for reading'
+                         ' the report %s' % self.path)
+            return
+        except tarfile.CompressionError:
+            logging.info('The compression method for the report %s is not '
+                         'supported' % self.path)
+            return
+        finally:
+            tarfileobj.close()
+        
+        return True
+    
+    
+    def has_valid_contents_number(self):
+        """Returns True is the report contains the same number of files that a
+        valid crash report has. Othewise it returns implicit False."""
+        try:
+            tarfileobj = tarfile.open(self.path, 'r:gz')
+        except tarfile.ReadError:
+            return
+        except tarfile.CompressionError:
+            return
+        else:
+            contents_list = tarfileobj.getnames()
+            if not len(contents_list) == 1:
+                logging.info('The report %s has invalid number of contents'
+                             % self.path)
+                return
+            self.data.name = contents_list[0]
+        finally:
+            tarfileobj.close()
+        
+        return True
+
+
+
+class CrashData(object):
+    """This class represents the crash data that a crash report contains."""
+    
+    valid_name = re.compile('^crashreport\.[A-Za-z0-9]{6}\.xml$')
+        
+    def __init__(self):
+        self.name = None
+        self.path = None
+        self.info = {}
+        self.commands = {}
+    
+    
+    def has_valid_name(self):
+        """Returns True if the report's crash data name matches the name of a
+        valid crash data. Otherwise it returns implicit False."""
+        match = re.match(self.__class__.valid_name, self.name)
+        
+        if not match:
+            logging.info('Invalid crash data name: %s' % self.name)
+            return
+        
+        return True
+    
+    
+    def has_valid_crashdata(self):
+        """Returns True if the crash data is a well formed and valid XML file.
+        Otherwise implicit False."""
+        dtdfile = StringIO("""<!ELEMENT crashreport (header, body)>
+                           <!ELEMENT header (email)>
+                           <!ELEMENT email (#PCDATA)>
+                           <!ELEMENT body (command+)>
+                           <!ELEMENT command (name, result)>
+                           <!ELEMENT name (#PCDATA)>
+                           <!ELEMENT result (#PCDATA)>""")
+        
+        try:
+            elemtree = etree.parse(self.path)
+        except:
+            logging.info('%s is not a well formed crash report data.' %
+                         (self.path))
+            return
+        else:
+            dtd = etree.DTD(dtdfile)
+            if not dtd.validate(elemtree):
+                logging.info('%s is not a valid crash report data.' %
+                             (self.path))
+                return
+        
+        return True
\ No newline at end of file

Added: soc2012/tzabal/server-side/akcrs-handler/database.py
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ soc2012/tzabal/server-side/akcrs-handler/database.py	Wed Aug  8 04:52:04 2012	(r240185)
@@ -0,0 +1,40 @@
+import logging
+import psycopg2
+
+import settings
+
+class Database:
+    
+    def __init__(self):
+        try:
+            self.connection = psycopg2.connect(database=settings.DBNAME,
+                                               host=settings.DBHOST,
+                                               user=settings.DBUSER,
+                                               password=settings.DBPASS)
+        except:
+            self.connection = None
+            logging.error('Could not connect to the database')
+        else:
+            self.cursor = self.connection.cursor()
+        
+        self.query = None
+        self.values = None
+    
+    def execute_query(self):
+        try:
+            if self.values:
+                self.cursor.execute(self.query, self.values)
+            else:
+                self.cursor.execute(self.query)
+        except Exception, err:
+            logging.info('Could not execute the query: %s' % self.query)
+            logging.info(err.pgerror)
+            return
+        
+        self.query = None
+        self.values = None
+        
+        return True
+
+    def save(self):
+        self.connection.commit()
\ No newline at end of file

Added: soc2012/tzabal/server-side/akcrs-handler/main.py
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ soc2012/tzabal/server-side/akcrs-handler/main.py	Wed Aug  8 04:52:04 2012	(r240185)
@@ -0,0 +1,497 @@
+import difflib
+import hashlib
+import logging
+import os
+import random
+import shutil
+import smtplib
+import string
+import tarfile
+import time
+from email.mime.text import MIMEText
+
+from lxml import etree
+
+import crashreport
+import database
+import settings
+
+
+db = database.Database()
+
+
+def move_invalid_report(path):
+    if not os.path.isfile(path):
+        return
+    
+    if not os.path.isdir(settings.INVALID_REPORTS_DIR):
+        logging.error('Invalid reports directory does not exist')
+        return
+    
+    shutil.move(path, settings.INVALID_REPORTS_DIR)
+
+
+def send_confirmation_email(report):
+    smtpserver = settings.SMTPSERVER
+    sender = settings.SENDER
+    receiver = report.data.info['email']
+    subject = settings.SUBJECT
+    text = settings.TEXT % (report.id, report.confirmation_code)
+    
+    message = MIMEText(text)
+    message['From'] = sender
+    message['To'] = receiver
+    message['Subject'] = subject
+    
+    try:
+        smtpconn = smtplib.SMTP(smtpserver)
+        smtpconn.sendmail(sender, receiver, message.as_string())
+    except smtplib.SMTPException, err:
+        logging.info(err)
+        return
+    finally:
+        smtpconn.quit()
+    
+    return True
+
+
+def generate_random_string(size):
+    """Generates and returns a random string of the specified size.
+    
+    The string is a sequence of characters that are chosen randomly from a set
+    that contains digits, lowercase and uppercase letters.
+    """
+    chars = string.letters + string.digits
+    return ''.join(random.choice(chars) for ch in range(size))
+
+
+def store_report(report):
+    # Bugs
+    if report.bug_id == -1:
+        db.query = ('INSERT INTO bugs (state, reported) '
+                    'VALUES (%s, %s) '
+                    'RETURNING id')
+        db.values = ('Open', 0)
+        
+        if not db.execute_query():
+            return
+        
+        report.bug_id = db.cursor.fetchone()
+        db.save()
+    elif type(report.bug_id) == type([]):
+        report.bug_id = -1
+    
+    # Submitters
+    db.query = 'SELECT id FROM submitters WHERE email = %s'
+    db.values = (report.data.info['email'], )
+    
+    if not db.execute_query():
+        return
+    
+    if db.cursor.rowcount:
+        report.submitter_id = db.cursor.fetchone()
+    else:
+        password = generate_random_string(8)
+        hashobj = hashlib.sha256()
+        hashobj.update(password)
+        hashpass = hashobj.hexdigest()
+        
+        db.query = ('INSERT INTO submitters (email, password) '
+                    'VALUES (%s, %s)'
+                    'RETURNING id')
+        db.values = (report.data.info['email'], hashpass)
+        
+        if not db.execute_query():
+            return
+        
+        report.submitter_id = db.cursor.fetchone()
+        db.save()
+    
+    # Reports
+    report.confirmation_code = generate_random_string(16)
+    
+    db.query = """INSERT INTO reports (bug_id, submitter_id, confirmation_code,
+    crashtype, crashdate, hostname, ostype, osrelease, version, machine, panic,
+    backtrace, top_significant_func, rem_significant_funcs, ps_axl, vmstat_s,
+    vmstat_m, vmstat_z, vmstat_i, pstat_T, pstat_s, iostat, ipcs_a, ipcs_T,
+    nfsstat, netstat_s, netstat_m, netstat_id, netstat_anr, netstat_anA,
+    netstat_aL, fstat, dmesg, kernelconfig, ddbcapturebuffer)
+    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
+    %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
+    RETURNING id"""
+    
+    db.values = (report.bug_id,
+                 report.submitter_id,
+                 report.confirmation_code,
+                 report.data.commands['crashtype'],
+                 report.data.commands['crashdate'],
+                 report.data.commands['hostname'],
+                 report.data.commands['ostype'],
+                 report.data.commands['osrelease'],
+                 report.data.commands['version'],
+                 report.data.commands['machine'],
+                 report.data.commands['panic'],
+                 report.data.commands['backtrace'],
+                 report.top_significant_func,
+                 report.rem_significant_funcs,
+                 report.data.commands['ps_axl'],
+                 report.data.commands['vmstat_s'],
+                 report.data.commands['vmstat_m'],
+                 report.data.commands['vmstat_z'],
+                 report.data.commands['vmstat_i'],
+                 report.data.commands['pstat_T'],
+                 report.data.commands['pstat_s'],
+                 report.data.commands['iostat'],
+                 report.data.commands['ipcs_a'],
+                 report.data.commands['ipcs_T'],
+                 report.data.commands['nfsstat'],
+                 report.data.commands['netstat_s'],
+                 report.data.commands['netstat_m'],
+                 report.data.commands['netstat_id'],
+                 report.data.commands['netstat_anr'],
+                 report.data.commands['netstat_anA'],
+                 report.data.commands['netstat_aL'],
+                 report.data.commands['fstat'],
+                 report.data.commands['dmesg'],
+                 report.data.commands['kernelconfig'],
+                 report.data.commands['ddbcapturebuffer'])
+    
+    if not db.execute_query():
+        return
+    
+    report.id = db.cursor.fetchone()
+    db.save()
+    
+    return True
+
+
+def uniqify_sequence(seq):
+    """Takes a sequence of elements and returns a sequence with only unique
+    elements. (Taken from http://www.peterbe.com/plog/uniqifiers-benchmark)"""
+    keys = {}
+    for e in seq:
+        keys[e] = 1
+    return keys.keys()
+
+
+def allocate_values(length):
+    """Takes an integer as input and returns a list with length equals to that
+    integer plus one. The list contains values that are allocated in an
+    increment and symmetric way, and the first index of the list (i.e zero) is
+    not used (is None). The values are calculated using the length of the list
+    and the constant REMAINING_FRAMES_MAX_PERC."""
+    # Average value
+    avg = settings.REMAINING_FRAMES_MAX_PERC / length
+    
+    # Divide the list in 2 sets
+    elems_per_set = length // 2
+        
+    # Initialize the list (need to access the middle element afterwards)
+    values = [None] * (length + 1)
+    
+    # Handle even and odd lengths
+    if length % 2 == 0:
+        # Auxiliary variable for swapping between the two sets
+        aux = 1
+    else:
+        aux = 2
+        # If odd, then the average value to the middle element
+        values[elems_per_set + 1] = avg
+    
+    # How the values will be allocated
+    diff = 1
+    # Increment and symmetric allocation of values
+    for i in range(elems_per_set, 0, -1):
+        values[i] = avg + diff
+        values[i+aux] = avg - diff
+        aux += 2
+        diff += 1
+    
+    return values
+
+
+def contains_any(str, set):
+    """Returns True if the string str contains any of the elements found
+    in the iterable set."""
+    for elem in set:
+        if elem in str:
+            return True
+    
+    return
+
+
+def get_significant_funcs(backtrace):
+    """Takes a backtrace as a string and returns a list that
+    contains only the function names of the most significant stack frames."""
+    
+    # Store every stack frame of the backtrace in a separate line
+    backtrace = backtrace.splitlines()
+    
+    # For every stack frame store only the function name
+    backtrace[0] = backtrace[0].split()[1]
+    for index, stackframe in enumerate(backtrace[1:], 1):
+        backtrace[index] = stackframe.split()[3]
+    
+    # Find the most significant stack frames and store only the function names
+    hit_significant = False
+    significant_funcs = []
+    insignificant_funcs = ('syscall', 'panic', 'trap', 'lock', 'sleep', '??')
+    for func in reversed(backtrace):
+        if not contains_any(func, insignificant_funcs):
+            hit_significant = True
+            significant_funcs.append(func)
+        else:
+            if hit_significant:
+                break
+    significant_funcs.reverse()
+    
+    return significant_funcs
+
+
+def recognize_report(report):
+    # Calculate the significant functions of the report
+    significant_funcs = get_significant_funcs(report.data.commands['backtrace'])
+    report.top_significant_func = significant_funcs[0]
+    report.rem_significant_funcs = significant_funcs[1:]
+
+    # Retrieve from the database the confirmed reports
+    db.query = ('SELECT bug_id, panic, top_significant_func, rem_significant_funcs '
+                'FROM Reports '
+                'WHERE confirmed = true')
+    if not db.execute_query():
+        return
+    loggedreports = db.cursor.fetchall()
+    
+    # A list that contains the percentages of similarity of the examined report
+    # with all the others retrieved from the database
+    sims = [[None for i in range(2)] for j in range(db.cursor.rowcount)]
+    
+    # Check the examined report against all the retrieved reports
+    for index, loggedreport in enumerate(loggedreports):
+        # Store the bug_id of the report
+        sims[index][0] = loggedreport[0]
+        
+        # Calculate the percentage of similarity between the panic messages
+        ratio = difflib.SequenceMatcher(None,
+                                        report.data.commands['panic'],
+                                        loggedreport[1]
+                                        ).ratio()
+        sims[index][1] = settings.PANIC_MESSAGE_MAX_PERC * ratio
+        
+        # Calculate the percentage of similarity between the top significant
+        # function names
+        ratio = difflib.SequenceMatcher(None,
+                                        report.top_significant_func,
+                                        loggedreport[2]
+                                        ).ratio()
+        sims[index][1] += settings.TOP_FRAME_MAX_PERC * ratio
+        
+        # Calculate the percentage of similarity between the remaining
+        # significant function names
+        
+        # Firstly, create an increment and symmetric allocation of maximum
+        # percentages for the remaining significant function names.
+        # Compare X function names, where X is the length of the report with
+        # the fewest remaining significant function names
+        length = min(len(significant_funcs), len(loggedreport[3]))
+        rem_sig_max_percs = allocate_values(length - 1)
+        
+        # Then, calculate the percentage of similarity between every remaining
+        # significant function name based on the previous calculated percentages
+        for i in range(1, length, 1):
+            ratio = difflib.SequenceMatcher(None,
+                                            report.rem_significant_funcs[i],
+                                            loggedreport[3][i]
+                                            ).ratio()
+            sims[index][1] += rem_sig_max_percs[i] * ratio
+    
+    # Find with which reports the examined report is similar based on the value
+    # of the limit percentage
+    passlimit = []
+    for sim in sims:
+        if sim[1] >= settings.LIMIT_PERC:
+            passlimit.append(sim[0])
+    
+    # Finally, check if the examined report concluded to refer to none or only
+    # one logged bug. If it refers to more than one bugs, then this is an
+    # indication that our algorithm is not accurate.
+    report.bug_id = -1
+    if len(passlimit):
+        if passlimit.count(passlimit[0]) == len(passlimit):
+            # Refers to a known bug
+            report.bug_id = passlimit[0]
+        else:
+            # Refers to more than one known bugs
+            report.bug_id = uniqify_sequence(passlimit)
+    
+    return True
+
+
+def parse_crashdata(report):
+    """Parses the crash data XML file of the given report and store the data in
+    instance variables of the report."""
+    validnames = ['crashtype', 'crashdate', 'hostname', 'ostype', 'osrelease',
+                  'version', 'machine', 'panic', 'backtrace', 'ps_axl',
+                  'vmstat_s', 'vmstat_m', 'vmstat_z', 'vmstat_i', 'pstat_T',
+                  'pstat_s', 'iostat', 'ipcs_a', 'ipcs_T', 'nfsstat',
+                  'netstat_s', 'netstat_m', 'netstat_id', 'netstat_anr',
+                  'netstat_anA', 'netstat_aL', 'fstat', 'dmesg', 'kernelconfig',
+                  'ddbcapturebuffer']
+    
+    if not os.path.isfile(report.data.path):
+        logging.info('Crash report data %s is not an existing regular file'
+                     % report.data.path)
+        return
+    
+    elemtree = etree.parse(report.data.path)
+    root = elemtree.getroot()
+    
+    report.data.info['email'] = root[0][0].text.strip()
+    
+    for elem in elemtree.iter():
+        if elem.tag == 'command':
+            children = list(elem)
+            name = children[0].text.strip()
+            result = children[1].text.strip()
+            if name in validnames:
+                report.data.commands[name] = result
+    
+    return True
+
+
+def discard_report(path):
+    """Discards a crash report from the system."""
+    os.remove(path)
+
+
+def clear_directory(directory):
+    """Takes the absolute path of a directory, and removes all the files (not
+    directories) that it contains."""
+    for filename in os.listdir(directory):
+        filepath = directory + '/' + filename
+        if os.path.isfile(filepath):
+            os.remove(filepath)
+
+
+def extract_report(report):
+    """Extracts the given report to the auxiliary directory."""
+    if not os.path.isdir(settings.AUXILIARY_DIR):
+        logging.error('Auxiliary directory does not exist')
+        return
+    
+    clear_directory(settings.AUXILIARY_DIR)
+    
+    try:
+        tarfileobj = tarfile.open(report.path, 'r:gz')
+        tarfileobj.extractall(settings.AUXILIARY_DIR)
+    except tarfile.ReadError:
+        return
+    except tarfile.CompressionError:
+        return
+    else:
+        report.data.path = settings.AUXILIARY_DIR + '/' + report.data.name
+    finally:
+        tarfileobj.close()
+    
+    return True
+
+
+def check_report(report):
+    """Checks a crash report for validity and security.
+    
+    It is a function that calls all the methods provided by the CrashReport and
+    the CrashData objects that are related with the validity of a report. The
+    methods are called in a stict order because some methods assign values
+    to the instance variables of the given object and some other methods depend
+    on them. This is done in order to avoid execution of the same code multiple
+    times, distinguish the checks easily, and organize the code better.
+    """
+    if not report.has_valid_name():
+        return
+    
+    if not report.has_valid_type():
+        return
+    
+    if not report.has_valid_contents_number():
+        return
+    
+    if not report.data.has_valid_name():
+        return
+    
+    if not extract_report(report):
+        return
+    
+    if not report.data.has_valid_crashdata():
+        return
+    
+    return True
+
+
+def create_pid_file():
+    """Creates the Process ID file that contains the PID of crashreportd.
+    
+    It is used from the rc.d script to stop the program normally.
+    """
+    pid = os.getpid()
+    try:
+        pidfile = open(settings.PID_FILE, 'w')
+        pidfile.write(str(pid))
+    except IOError:
+        logging.error('Could not create the Process ID file')
+        return
+    finally:
+        pidfile.close()
+    
+    return True
+
+
+def start_logging():
+    """Turns on or off the logging facility."""
+    if settings.LOGGING_FILE:
+        logging.basicConfig(level=logging.DEBUG, filename=settings.LOGGING_FILE,
+                            format='%(asctime)s in %(funcName)s() at '
+                            '%(lineno)s %(levelname)s: %(message)s',
+                            datefmt='%Y-%m-%d %H:%M:%S')
+
+
+def main():
+    start_logging()
+    
+    if not create_pid_file():
+        return
+    
+    if not db.connection:
+        return
+    
+    while True:
+        dirlist = os.listdir(settings.CRASHREPORTS_DIR)
+        for filename in dirlist:
+            path = settings.CRASHREPORTS_DIR + '/' + filename
+            report = crashreport.CrashReport(path)
+            
+            if not check_report(report):
+                move_invalid_report(report.path)
+                continue
+            
+            if not parse_crashdata(report):
+                move_invalid_report(report.path)
+                continue
+            
+            if not recognize_report(report):
+                move_invalid_report(report.path)
+                continue
+            
+            if not store_report(report):
+                move_invalid_report(report.path)
+                continue
+            
+            if not send_confirmation_email(report):
+                move_invalid_report(report.path)
+                continue
+            
+            discard_report(report.path)
+        time.sleep(settings.INTERVAL_TIME)
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file

Added: soc2012/tzabal/server-side/akcrs-handler/settings.py
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ soc2012/tzabal/server-side/akcrs-handler/settings.py	Wed Aug  8 04:52:04 2012	(r240185)
@@ -0,0 +1,63 @@
+# Interval time
+INTERVAL_TIME = 10
+
+# Process ID file
+PID_FILE = '/var/run/crashreportd.pid'
+
+# Crashreports directory
+CRASHREPORTS_DIR = '/var/spool/crashreports'
+
+# Auxiliary directory
+AUXILIARY_DIR = '/tmp/crashreports'
+
+# Invalid crash reports
+INVALID_REPORTS_DIR = AUXILIARY_DIR + '/invalidreports'
+
+# Logging file
+LOGGING_FILE = '/home/tzabal/crashreportd.log'
+
+# Database name
+DBNAME = 'akcrsdb'
+
+# Database host
+DBHOST = '127.0.0.1'
+
+# Database user
+DBUSER = 'akcrs'
+
+# Database user password
+DBPASS = 'freebsd'
+
+# SMTP Server
+SMTPSERVER = 'smtp.hol.gr'
+
+# Email address and name of the sender
+SENDER = 'Automated Kernel Crash Reporting System <akcrs at freebsd.org>'
+
+# Confirmation email subject
+SUBJECT = 'Confirm your kernel crash report'
+
+# Confirmation email text
+TEXT = """\
+Hello,
+
+Please confirm your kernel crash report by clicking the following link:
+http://akcrs.dyndns.org/confirm_report?id=%s&code=%s
+
+Once you confirm, your kernel crash report will be stored in our database as
+valid.
+
+Thank your for your time.
+"""
+
+# Panic message maximum percentage
+PANIC_MESSAGE_MAX_PERC = 25
+
+# Top significant frame maximum percentage
+TOP_FRAME_MAX_PERC = 25
+
+# Remaining significant frames maximum percentage
+REMAINING_FRAMES_MAX_PERC = 50.0
+
+# Limit percentage for similar reports
+LIMIT_PERC = 60
\ No newline at end of file

Modified: soc2012/tzabal/server-side/akcrs-release/9.0.0/usr.sbin/crashreportd/crashreportd.py
==============================================================================
--- soc2012/tzabal/server-side/akcrs-release/9.0.0/usr.sbin/crashreportd/crashreportd.py	Wed Aug  8 04:42:24 2012	(r240184)
+++ soc2012/tzabal/server-side/akcrs-release/9.0.0/usr.sbin/crashreportd/crashreportd.py	Wed Aug  8 04:52:04 2012	(r240185)
@@ -151,20 +151,27 @@
 
 
 def send_confirmation_email(report):
-    sender = 'Automated Kernel Crash Reporting System <akcrs at freebsd.org>'
-    #receiver = report.data.info['email']
-    receiver = 'invalid at it.teithe.gr'
-    subject = 'Kernel Crash Report Confirmation'
-    text = 'Confirm your kernel crash report by clicking here.'
     smtpserver = 'smtp.hol.gr'
+    sender = 'Automated Kernel Crash Reporting System <akcrs at freebsd.org>'
+    receiver = report.data.info['email']
+    subject = 'Confirm your kernel crash report'
+    text = """\
+    Hello,
+    
+    Please confirm your kernel crash report by clicking the following link:
+    http://akcrs.dyndns.org/confirm_report?code=%s
+    
+    Once you confirm, your kernel crash report will be stored in our database
+    as valid.
+    
+    Thank you for your time.
+    """ % (report.confirmation_code)
     
     message = MIMEText(text)
     message['From'] = sender
     message['To'] = receiver
     message['Subject'] = subject
     
-    #print message
-    
     try:
         smtpconn = smtplib.SMTP(smtpserver)
         smtpconn.sendmail(sender, receiver, message.as_string())
@@ -191,28 +198,25 @@
     elemtree = etree.parse(report.data.path)
     root = elemtree.getroot()
     
-    report.data.info['email'] = re.sub(r'\s', '', root[0][0].text)
+    report.data.info['email'] = root[0][0].text.strip()
     
     for elem in elemtree.iter():
         if elem.tag == 'command':
             children = list(elem)
-            #print 'children[0].text: %s' % (children[0].text)
-            name = re.sub(r'\s', '', children[0].text)
-            result = children[1].text
+            name = children[0].text.strip()
+            result = children[1].text.strip()
             if name in validnames:
-                #print 'name: %s' % (name)
                 report.data.commands[name] = result
     
     return True
 
 
-def generate_password():
-    """Generates and returns a random password.
+def generate_random_string(size):
+    """Generates and returns a random string of the specified size.
     
-    Password is 8 characters in length and it may contain digits, lowercase and
-    uppercase letters.
+    The string is a sequence of characters that are chosen randomly from a set
+    that contains digits, lowercase and uppercase letters.
     """
-    size = 8
     chars = string.letters + string.digits
     return ''.join(random.choice(chars) for ch in range(size))
 
@@ -228,10 +232,10 @@
     
     if _curs.rowcount:
         submitter_id = _curs.fetchone()
-        print 'Submitter_id: %s' % (submitter_id)
+        #print 'Submitter_id: %s' % (submitter_id)
     else:
-        password = generate_password()
-        print 'password: %s' % (password)
+        password = generate_random_string(8)
+        #print 'password: %s' % (password)
         hashobj = hashlib.sha256()
         hashobj.update(password)
         hashpass = hashobj.hexdigest()
@@ -252,16 +256,21 @@
     # with previously logged reports
     bug_id = -1
     
-    query = """INSERT INTO Reports (bug_id, submitter_id, crashtype, crashdate,
-    hostname, ostype, osrelease, version, machine, panic, backtrace, ps_axl,
-    vmstat_s, vmstat_m, vmstat_z, vmstat_i, pstat_T, pstat_s, iostat, ipcs_a,
-    ipcs_T, nfsstat, netstat_s, netstat_m, netstat_id, netstat_anr,
-    netstat_anA, netstat_aL, fstat, dmesg, kernelconfig, ddbcapturebuffer)
-    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
-    %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"""
+    confirmation_code = generate_random_string(12)
+    report.confirmation_code = confirmation_code
+    
+    query = """INSERT INTO Reports (bug_id, submitter_id, confirmation_code,
+    crashtype, crashdate, hostname, ostype, osrelease, version, machine, panic,
+    backtrace, ps_axl, vmstat_s, vmstat_m, vmstat_z, vmstat_i, pstat_T,
+    pstat_s, iostat, ipcs_a, ipcs_T, nfsstat, netstat_s, netstat_m, netstat_id,
+    netstat_anr, netstat_anA, netstat_aL, fstat, dmesg, kernelconfig,
+    ddbcapturebuffer) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
+    %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
+    %s, %s)"""
     
     values = (bug_id,
               submitter_id,
+              confirmation_code,
               report.data.commands['crashtype'],
               report.data.commands['crashdate'],
               report.data.commands['hostname'],
@@ -389,7 +398,6 @@
     try:
         pidfile = open(_pid_file, 'w')
         pidfile.write(str(pid))
-        pidfile.close()
     except IOError:
         return
     finally: