svn commit: r246971 - user/bugmeister/gnats/tools
Mark Linimon
linimon at FreeBSD.org
Tue Feb 19 02:37:42 UTC 2013
Author: linimon (doc,ports committer)
Date: Tue Feb 19 02:37:41 2013
New Revision: 246971
URL: http://svnweb.freebsd.org/changeset/base/246971
Log:
Ad-hoc script by linimon to rank the goodness of incoming raw data.
Added:
user/bugmeister/gnats/tools/abuseHandler.py (contents, props changed)
Added: user/bugmeister/gnats/tools/abuseHandler.py
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ user/bugmeister/gnats/tools/abuseHandler.py Tue Feb 19 02:37:41 2013 (r246971)
@@ -0,0 +1,189 @@
+"""Go through a set of submitted PRs in a directory and rename
+ them for manual handling if they meet certain spammy criteria.
+ Original author: Mark Linimon, Lonesome Dove Computing Services,
+ www.lonesome.com. License: BSD license.
+ $FreeBSD$
+"""
+
+__version__ = "0.02"
+
+__all__ = ["abuseHandler"]
+
+import os
+import string
+import sys
+
+QUEUE_DIR = "/home/gnats/gnats-queue"
+
+HEADER_SYNOPSIS = ">Synopsis:"
+HEADER_ORGINATOR = ">Orginator:"
+HEADER_RELEASE = ">Release:"
+
+INTERESTING_HEADERS = ( \
+ HEADER_SYNOPSIS, \
+ HEADER_ORGINATOR, \
+ HEADER_RELEASE
+)
+
+ABUSED_HEADERS = ( \
+ HEADER_SYNOPSIS, \
+ HEADER_ORGINATOR, \
+ HEADER_RELEASE
+)
+
+ABUSIVE_ORIGINATORS = ( \
+ "Perry Keller", \
+)
+
+IGNORE_FILE_PREFIX = "."
+
+LINELEN = 1024 # try to prevent buffer overruns when reading files
+
+DEBUG_LINE = 0
+DEBUG_VERBOSE = 0
+DEBUG_VERY_VERBOSE = 0
+
+
+# look for all N headers in ABUSED_HEADERS existing, and having the
+# same value.
+def checkForAbusiveHeaders( headers ):
+
+ compare = None
+ for header in ABUSED_HEADERS:
+ try:
+ if DEBUG_VERY_VERBOSE:
+ print "examining header %s" % header
+ value = headers[ header ]
+ if compare == None:
+ compare = value
+ else:
+ if compare != value:
+ if DEBUG_VERBOSE:
+ print "file is ok: %s != %s" % ( compare, value )
+ return False
+ except:
+ if DEBUG_VERBOSE:
+ print "file is ok: no value for %s" % header
+ return False
+
+ return True
+
+
+def checkForAbusiveOriginators( headers ):
+
+ for originator in ABUSIVE_ORIGINATORS:
+ try:
+ if orginator == headers[ HEADER_ORGINATOR ]:
+ return True
+ except:
+ return False
+
+ return False
+
+
+def renamingScheme( infile ):
+
+ # hard-coded by GNATS
+ return IGNORE_FILE_PREFIX + infile
+
+
+def renameFile( filename ):
+
+ newfilename = renamingScheme( filename )
+ try:
+ os.rename( filename, renamingScheme( filename ) )
+ print "suspicious queued PR %s" % filename + \
+ " renamed to %s for manual inspection" % newfilename
+ except Exception, e:
+ print "could not rename suspected spam PR %s:" % filename
+ print str( e )
+ pass
+
+
+def handleFiles( dir ):
+
+ try:
+ # dunno why I have to do this, but rename fails with a full pathname
+ os.chdir( dir )
+ filenames = os.listdir( "." )
+ for filename in filenames:
+ if string.find( filename, IGNORE_FILE_PREFIX ) == 0:
+ if DEBUG_VERBOSE:
+ print "skippping file %s" % filename
+ else:
+ if DEBUG_VERBOSE:
+ print "handling file %s" % filename
+ try:
+ handleFile( filename )
+ except Exception, e:
+ print "could not handle file %s:" % filename
+ print str( e )
+ except Exception, e:
+ print "could not list directory %s:" % dir
+ print str( e )
+
+
+def handleFile( filename ):
+
+ infile = None
+ try:
+ infile = file( filename )
+ except Exception, e:
+ print "could not open %s:" % filename
+ print str( e )
+ return
+
+ headers = {}
+
+ while 1:
+ try:
+ line = infile.readline( LINELEN )
+ if len( line ) == 0:
+ break
+ else:
+ if DEBUG_LINE:
+ print 'line: ' + line
+
+ tokens = string.split( line )
+ if len( tokens ) > 1:
+ # TODO add more analysis in the future?
+ header = tokens[ 0 ]
+ if header in INTERESTING_HEADERS:
+ headers[ header ] = tokens[ 1 : ]
+
+ # endif len( line ) == 0
+
+ except EOFError:
+ break
+ except IOError, e:
+ print 'handleFile: IOError:'
+ print e
+ break
+ # end while 1 (read line)
+
+ try:
+ infile.close()
+ except:
+ pass
+
+ # run algorithms and use results to determine whether to rename file
+ hasAbusiveHeaders = checkForAbusiveHeaders( headers )
+ hasAbusiveOriginator = checkForAbusiveOriginators( headers )
+
+ # TODO add more tests as appropriate in the future
+
+ if hasAbusiveHeaders or hasAbusiveOriginator:
+ renameFile( filename )
+
+
+# main
+
+if __name__ == '__main__':
+
+ dir = QUEUE_DIR
+
+ if len( sys.argv ) > 1:
+ dir = sys.argv[ 1 ]
+
+ handleFiles( dir )
+
More information about the svn-src-user
mailing list