svn commit: r246971 - user/bugmeister/gnats/tools

Mark Linimon linimon at FreeBSD.org
Tue Feb 19 02:37:42 UTC 2013


Author: linimon (doc,ports committer)
Date: Tue Feb 19 02:37:41 2013
New Revision: 246971
URL: http://svnweb.freebsd.org/changeset/base/246971

Log:
  Ad-hoc script by linimon to rank the goodness of incoming raw data.

Added:
  user/bugmeister/gnats/tools/abuseHandler.py   (contents, props changed)

Added: user/bugmeister/gnats/tools/abuseHandler.py
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ user/bugmeister/gnats/tools/abuseHandler.py	Tue Feb 19 02:37:41 2013	(r246971)
@@ -0,0 +1,189 @@
+"""Go through a set of submitted PRs in a directory and rename
+   them for manual handling if they meet certain spammy criteria.
+   Original author: Mark Linimon, Lonesome Dove Computing Services,
+   www.lonesome.com.  License: BSD license.
+   $FreeBSD$
+"""
+
+__version__ = "0.02"
+
+__all__ = ["abuseHandler"]
+
+import os
+import string
+import sys
+
+QUEUE_DIR = "/home/gnats/gnats-queue"
+
+HEADER_SYNOPSIS  = ">Synopsis:"
+HEADER_ORGINATOR = ">Orginator:"
+HEADER_RELEASE   = ">Release:"
+
+INTERESTING_HEADERS = ( \
+    HEADER_SYNOPSIS, \
+    HEADER_ORGINATOR, \
+    HEADER_RELEASE
+)
+
+ABUSED_HEADERS = ( \
+    HEADER_SYNOPSIS, \
+    HEADER_ORGINATOR, \
+    HEADER_RELEASE
+)
+
+ABUSIVE_ORIGINATORS = ( \
+    "Perry Keller", \
+)
+
+IGNORE_FILE_PREFIX = "."
+
+LINELEN = 1024        # try to prevent buffer overruns when reading files
+
+DEBUG_LINE         = 0
+DEBUG_VERBOSE      = 0
+DEBUG_VERY_VERBOSE = 0
+
+
+# look for all N headers in ABUSED_HEADERS existing, and having the
+# same value.
+def checkForAbusiveHeaders( headers ):
+
+    compare = None
+    for header in ABUSED_HEADERS:
+        try:
+            if DEBUG_VERY_VERBOSE:
+                print "examining header %s" % header
+            value = headers[ header ]
+            if compare == None:
+                compare = value
+            else:
+                if compare != value:
+                    if DEBUG_VERBOSE:
+                        print "file is ok: %s != %s" % ( compare, value )
+                    return False
+        except:
+            if DEBUG_VERBOSE:
+                print "file is ok: no value for %s" % header
+            return False
+
+    return True
+
+
+def checkForAbusiveOriginators( headers ):
+
+    for originator in ABUSIVE_ORIGINATORS:
+        try:
+            if orginator == headers[ HEADER_ORGINATOR ]:
+                return True
+        except:
+            return False
+
+    return False
+
+
+def renamingScheme( infile ):
+
+    # hard-coded by GNATS
+    return IGNORE_FILE_PREFIX + infile
+
+
+def renameFile( filename ):
+
+    newfilename = renamingScheme( filename )
+    try:
+        os.rename( filename, renamingScheme( filename ) )
+        print "suspicious queued PR %s" % filename + \
+            " renamed to %s for manual inspection" % newfilename
+    except Exception, e:
+        print "could not rename suspected spam PR %s:" % filename
+        print str( e )
+        pass
+
+
+def handleFiles( dir ):
+
+    try:
+        # dunno why I have to do this, but rename fails with a full pathname
+        os.chdir( dir )
+        filenames = os.listdir( "." )
+        for filename in filenames:
+            if string.find( filename, IGNORE_FILE_PREFIX ) == 0:
+                if DEBUG_VERBOSE:
+                    print "skippping file %s" % filename
+            else:
+                if DEBUG_VERBOSE:
+                    print "handling file %s" % filename
+                try:
+                    handleFile( filename )
+                except Exception, e:
+                    print "could not handle file %s:" % filename
+                    print str( e )
+    except Exception, e:
+        print "could not list directory %s:" % dir
+        print str( e )
+
+
+def handleFile( filename ):
+
+    infile = None
+    try:
+        infile = file( filename )
+    except Exception, e:
+        print "could not open %s:" % filename
+        print str( e )
+        return
+
+    headers = {}
+
+    while 1:
+        try:
+            line = infile.readline( LINELEN )
+            if len( line ) == 0:
+                break
+            else:
+                if DEBUG_LINE:
+                    print 'line: ' + line
+
+                tokens = string.split( line )
+                if len( tokens ) > 1:
+                    # TODO add more analysis in the future?
+                    header = tokens[ 0 ]
+                    if header in INTERESTING_HEADERS:
+                        headers[ header ] = tokens[ 1 : ]
+
+            # endif len( line ) == 0
+
+        except EOFError:
+            break
+        except IOError, e:
+            print 'handleFile: IOError:'
+            print e
+            break
+    # end while 1 (read line)
+
+    try:
+        infile.close()
+    except:
+        pass
+
+    # run algorithms and use results to determine whether to rename file
+    hasAbusiveHeaders = checkForAbusiveHeaders( headers )
+    hasAbusiveOriginator = checkForAbusiveOriginators( headers )
+
+    # TODO add more tests as appropriate in the future
+
+    if hasAbusiveHeaders or hasAbusiveOriginator:
+        renameFile( filename )
+
+
+# main
+
+if __name__ == '__main__':
+
+    dir = QUEUE_DIR
+
+    if len( sys.argv ) > 1:
+        dir = sys.argv[ 1 ]
+
+    handleFiles( dir )
+


More information about the svn-src-user mailing list