summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAntanas Uršulis <antanas.ursulis@gmail.com>2013-07-29 19:02:17 +0300
committerAntanas Uršulis <antanas.ursulis@gmail.com>2013-07-29 19:02:17 +0300
commit8dfea24b40c34292f20ab60975d3585094b70cb0 (patch)
treef80a5ff51470fcfc6ac8ff24412efda785ee5582
parentPort simple_client to urllib2, so that we can set Content-Type (diff)
downloadlog-analysis-8dfea24b40c34292f20ab60975d3585094b70cb0.tar.gz
log-analysis-8dfea24b40c34292f20ab60975d3585094b70cb0.tar.bz2
log-analysis-8dfea24b40c34292f20ab60975d3585094b70cb0.zip
Introduce processors and PortageProcessor.
A processor is initialised with a database and storage provider. It should implement the process(request, source) method, where request is a protobuf Submission message. process() should analyse the received files, perform any required transformations and should usually store the files and create appropriate database entries. Processors are multiplexed through the 'provider' variable in the protobuf Submission message. This allows to process/analyse various types of logs differently.
-rw-r--r--flask_app.py8
-rw-r--r--portage_processor.py74
-rw-r--r--simple_client.py1
-rw-r--r--submission.proto3
4 files changed, 82 insertions, 4 deletions
diff --git a/flask_app.py b/flask_app.py
index ce55c38..832702c 100644
--- a/flask_app.py
+++ b/flask_app.py
@@ -7,8 +7,11 @@ import os, socket
import submission_pb2, storage
from flask import Flask, request
+from portage_processor import PortageProcessor
+
app = Flask(__name__)
store = storage.FilesystemStorage('logs/')
+processors = {'portage' : PortageProcessor(None, store)} # TODO: initialise from config file
@app.route('/')
def index():
@@ -19,9 +22,8 @@ def submit():
submission = submission_pb2.Submission()
submission.ParseFromString(request.data)
source = socket.getfqdn(request.remote_addr) # TODO: is this ok?
- # TODO: pass through analyser
- for f in submission.files:
- store.save_file(source, f.filename, f.data)
+
+ processors[submission.provider].process(submission, source)
return ''
if __name__ == '__main__':
diff --git a/portage_processor.py b/portage_processor.py
new file mode 100644
index 0000000..2403cdf
--- /dev/null
+++ b/portage_processor.py
@@ -0,0 +1,74 @@
+import re, StringIO
+
+class PortageProcessor:
+ _r = {
+ 'warnings' : re.compile(r"(Tinderbox QA Warning!|QA Notice: (Pre-stripped|file does not exist|command not found|USE flag|Files built without respecting|The following files)|linux_config_exists|will always overflow|called with bigger|maintainer mode detected|econf called in src_compile|udev rules should be installed)"),
+ 'testfailed' : re.compile(r"^ \* ERROR: .* failed \(test phase\):"),
+ 'failed' : re.compile(r"^ \* ERROR: .* failed"),
+ 'collision' : re.compile(r"Detected file collision"),
+ 'maintainer' : re.compile(r"^ \* Maintainer: ([a-zA-Z0-9.@_+-]+)(?: ([a-zA-Z0-9.@_+,-]+))?$"),
+ 'escapes' : re.compile(r"\x1b\[[^\x40-\x7e]*[\x40-\x7e]")
+ }
+
+ def __init__(self, db, storage):
+ self.db = db
+ self.storage = storage
+
+ def process(self, request, source):
+ for f in request.files:
+ matches = 0
+ pkg_failed = False
+ test_failed = False
+ collision = False
+ bug_assignee = 'bug-wranglers@gentoo.org'
+ bug_cc = ''
+
+ # TODO: look at proper HTML generation methods:
+ # (*) either XHTML via xml.etree
+ # (*) or Jinja2 (is it possible to parse and generate in one pass?)
+ output = StringIO.StringIO()
+ output.write('''\
+<!doctype html>
+<html>
+ <head>
+ <link rel="stylesheet" type="text/css" href="htmlgrep.css">
+ </head>
+ <body>
+ <ol>
+''')
+
+ for line in f.data.split("\n"):
+ match = False
+
+ line = self._r['escapes'].sub('', line)
+
+ if self._r['warnings'].search(line):
+ match = True
+ elif self._r['testfailed'].search(line):
+ test_failed = True
+ match = True
+ elif self._r['failed'].search(line):
+ pkg_failed = True
+ match = True
+ elif self._r['collision'].search(line):
+ pkg_failed = True
+ collision = True
+ match = True
+ else:
+ m = self._r['maintainer'].search(line)
+ if m:
+ bug_assignee, bug_cc = m.group(1, 2)
+
+ if match:
+ matches += 1
+ output.write('\t'*3 + '<li class="match">' + line + '</li>\n')
+ else:
+ output.write('\t'*3 + '<li>' + line + '</li>\n')
+
+ output.write('''\
+ </ol>
+ </body>
+</html>
+''')
+
+ self.storage.save_file(source, f.filename, output.getvalue())
diff --git a/simple_client.py b/simple_client.py
index 99a4116..ab4bccf 100644
--- a/simple_client.py
+++ b/simple_client.py
@@ -6,6 +6,7 @@ import submission_pb2, sys, urllib2, os
def send_submission(filenames):
submission = submission_pb2.Submission()
+ submission.provider = "portage"
for f in filenames:
new_file = submission.files.add()
diff --git a/submission.proto b/submission.proto
index b06310f..42cf97c 100644
--- a/submission.proto
+++ b/submission.proto
@@ -4,5 +4,6 @@ message Submission {
required bytes data = 2;
}
- repeated File files = 1;
+ required string provider = 1;
+ repeated File files = 2;
}