diff options
Diffstat (limited to 'py/log_parser.py')
-rw-r--r-- | py/log_parser.py | 206 |
1 files changed, 105 insertions, 101 deletions
diff --git a/py/log_parser.py b/py/log_parser.py index eb081a9..e3725bb 100644 --- a/py/log_parser.py +++ b/py/log_parser.py @@ -7,113 +7,113 @@ import re import io import json import os -from sqlalchemy.ext.declarative import declarative_base -import sqlalchemy as sa import argparse -Base = declarative_base() - -class ProjectsPattern(Base): - __tablename__ = "projects_pattern" - id = sa.Column(sa.Integer, primary_key=True) - project_uuid = sa.Column(sa.String(36), nullable=False) - search = sa.Column(sa.String(50), nullable=False) - start = sa.Column(sa.Integer, default=0) - end = sa.Column(sa.Integer, default=0) - status = sa.Column(sa.Enum('info', 'warning', 'ignore', 'error'), default='info') - type = sa.Column(sa.Enum('info', 'qa', 'compile', 'configure', 'install', 'postinst', 'prepare', 'pretend', 'setup', 'test', 'unpack', 'ignore', 'issues', 'misc', 'elog'), default='info') - search_type = sa.Column(sa.Enum('in', 'startswith', 'endswith', 'search'), default='in') +def getJsonFromFile(path, phase): + with open(path) as f: + try: + JsonList = json.load(f) + except json.decoder.JSONDecodeError as e: + print(f"{e} in file: {path}") + return [] + return JsonList[phase] -def get_pattern_dict(project_pattern): +def get_pattern_dict(catchissue, i, uuid): + #print(catchissue) patten_dict = {} - patten_dict['id'] = project_pattern.id - patten_dict['project_uuid'] = project_pattern.project_uuid - patten_dict['search'] = project_pattern.search - patten_dict['status'] = project_pattern.status - patten_dict['type'] = project_pattern.type + patten_dict['line'] = i + patten_dict['uuid'] = uuid + patten_dict['string'] = catchissue[0] + patten_dict['start'] = catchissue[1] + patten_dict['end'] = catchissue[2] + patten_dict['status'] = catchissue[3] + patten_dict['search_type'] = catchissue[4] + patten_dict['url'] = catchissue[5] + patten_dict['description'] = catchissue[6] return patten_dict -def addPatternToList(Session, log_search_pattern, uuid): - for project_pattern in Session.query(ProjectsPattern).filter_by(project_uuid=uuid).all(): - # check if the search pattern is vaild - project_pattern_search = project_pattern.search - try: - re.compile(project_pattern_search) - except re.error: - print("Non valid regex pattern") - print(project_pattern.search) - print(project_pattern.id) +def addPatternToList(phaseList, log_search_patterns, uuid): + for phase in phaseList: + if uuid == '00000000-0000-0000-000000000000': + path = 'LogPattern' else: - if project_pattern.type == 'ignore': - log_search_pattern['ignore'].append(get_pattern_dict(project_pattern)) - if project_pattern.type == 'test': - log_search_pattern['test'].append(get_pattern_dict(project_pattern)) + path = os.path.join('Project', uuid, 'LogPattern') + CatchIssueFile = os.path.join(path, 'CatchIssues' + phase + '.json') + CatchIssueList = getJsonFromFile(CatchIssueFile, phase) + i = 3 + for catchissue in CatchIssueList: + search_pattern = get_pattern_dict(catchissue, i, uuid) + try: + re.compile(search_pattern['string']) + except re.error: + print(f"Non valid regex pattern in line: {str(search_pattern['line'])} String: {search_pattern['string']} Project: {search_pattern['uuid']} Phase: {phase}") else: - log_search_pattern['default'].append(get_pattern_dict(project_pattern)) - return log_search_pattern + log_search_patterns[phase].append(search_pattern) + i = i + 1 + return log_search_patterns -def get_log_search_pattern(Session, uuid, default_uuid): +def get_log_search_patterns(uuid): + path = os.path.join('LogPattern', 'PhaseList.json') + PhaseList = getJsonFromFile(path,'PhaseList') # get pattern from the projects and add that to log_search_pattern - log_search_pattern = {} - log_search_pattern['ignore'] = [] - log_search_pattern['default'] = [] - log_search_pattern['test'] = [] - log_search_pattern = addPatternToList(Session, log_search_pattern, uuid) - log_search_pattern = addPatternToList(Session, log_search_pattern, default_uuid) - return log_search_pattern + log_search_patterns = {} + for phase in PhaseList: + log_search_patterns[phase] = [] + uuid_default = '00000000-0000-0000-000000000000' + log_search_patterns = addPatternToList(PhaseList, log_search_patterns, uuid_default) + #log_search_pattern = addPatternToList(PhaseList, log_search_pattern, uuid) + return log_search_patterns -def get_search_pattern_match(log_search_pattern, text_line): - match_list = [] - for search_pattern in log_search_pattern: - if re.search(search_pattern['search'], text_line): - match_list.append(search_pattern) - return match_list +def get_search_pattern_match(search_pattern, text_line): + #print(f"Text: {text_line}") + if search_pattern['search_type'] == 'search': + if re.search(search_pattern['string'], text_line): + #print(f"Match string: {search_pattern['string']} Type: {search_pattern['search_type']}") + return True + elif search_pattern['search_type'] == 'startswith': + if text_line.startswith(search_pattern['string']): + #print(f"Match string: {search_pattern['string']} Type: {search_pattern['search_type']}") + return True + elif search_pattern['search_type'] == 'endswith': + if text_line.endswith(search_pattern['string']): + #print(f"Match string: {search_pattern['string']} Type: {search_pattern['search_type']}") + return True + elif search_pattern['search_type'] == 'in': + if search_pattern['string'] in text_line: + #print(f"Match string: {search_pattern['string']} Type: {search_pattern['search_type']}") + return True + else: + return False -def search_buildlog(log_search_pattern, text_line, index): +def search_buildlog(log_search_patterns, text_line, index): summary = {} - #FIXME: add check for test - # don't log ignore lines - if get_search_pattern_match(log_search_pattern['ignore'], text_line) != []: - return False - # search default pattern summary[index] = { 'text' : text_line, 'pattern_info' : [], } - search_pattern_match_list = get_search_pattern_match(log_search_pattern['default'], text_line) - if search_pattern_match_list != []: - for search_pattern_match in search_pattern_match_list: - summary[index]['pattern_info'].append({ - 'type' : search_pattern_match['type'], - 'status' : search_pattern_match['status'], - 'id' : search_pattern_match['id'], - 'search_pattern' : search_pattern_match['search'], - }) - # we add all line that start with ' * ' or '>>>' as info - if text_line.startswith(' * ') or text_line.startswith('>>>'): - summary[index]['pattern_info'].append({ - 'type' : 'info', - 'status' : 'info', - 'id' : 0, - 'search_pattern' : 'auto', - }) + for phase, search_patterns in log_search_patterns.items(): + for search_pattern in search_patterns: + match = get_search_pattern_match(search_pattern, text_line) + if phase == 'Ignore' and match: + return False + elif phase != 'Ignore' and match: + summary[index]['pattern_info'].append({ + 'search_type' : search_pattern['search_type'], + 'status' : search_pattern['status'], + 'line' : search_pattern['line'], + 'search_pattern' : search_pattern['string'], + 'phase' : phase, + 'uuid' : search_pattern['uuid'], + 'url' : search_pattern['url'], + 'description' : search_pattern['description'], + }) if summary[index]['pattern_info'] != []: + #print(f"summary: {summary}") return summary return False -def getConfigSettings(): - #configpath = os.getcwd() - with open('logparser.json') as f: - config = json.load(f) - return config - -def getDBSession(args): - engine = sa.create_engine(args.database) - Session = sa.orm.sessionmaker(bind = engine) - return Session() - -def getMultiprocessingPool(config): - return Pool(processes = int(config['core'])) +def getMultiprocessingPool(args): + return Pool(processes = int(args.cpu)) def getJsonResult(results): for r in results: @@ -128,14 +128,11 @@ def getJsonResult(results): def runLogParser(args): index = 1 logfile_text_dict = {} - config = getConfigSettings() - Session = getDBSession(args) - #mp_pool = getMultiprocessingPool(config) summary = {} #NOTE: The patten is from https://github.com/toralf/tinderbox/tree/master/data files. - # Is stored in a db instead of files. - log_search_pattern = get_log_search_pattern(Session, args.uuid, config['default_uuid']) - Session.close() + # Is stored in json files + # make dict with it + log_search_patterns = get_log_search_patterns(args.uuid) # read the log file to dict with open(args.file, encoding='utf8', errors='ignore') as f: for text_line in f: @@ -146,21 +143,28 @@ def runLogParser(args): # index = index + 1 # run the search parse pattern on the text lines #params = [(log_search_pattern, text, line_index,) for line_index, text in logfile_text_dict.items()] - with getMultiprocessingPool(config) as pool: - results = list(pool.apply_async(search_buildlog, args=(log_search_pattern, text, line_index,)) for line_index, text in logfile_text_dict.items()) - #results = pool.starmap(search_buildlog, params) - getJsonResult(results) - pool.close() - pool.join() + if not args.debug: + with getMultiprocessingPool(args) as pool: + results = list(pool.apply_async(search_buildlog, args=(log_search_patterns, text, line_index,)) for line_index, text in logfile_text_dict.items()) + getJsonResult(results) + pool.close() + pool.join() + else: + results = [] + for line_index, text in logfile_text_dict.items(): + results.append(search_buildlog(log_search_pattern, text, line_index)) + #FIXME: Json output + #getJsonResult(results) def main(): # get filename, project_uuid default_project_uuid parser = argparse.ArgumentParser() parser.add_argument("-f", "--file", required=True) parser.add_argument("-u", "--uuid", required=True) - parser.add_argument("-e", "--default-uuid", required=False) - parser.add_argument("-c", "--cpu", required=False) - parser.add_argument("-d", "--database", required=True) + parser.add_argument("-c", "--cpu", required=True) + #FIXME: add If args.debug .... wear is needed + parser.add_argument("-d", "--debug", action="store_true", required=False) + args = parser.parse_args() runLogParser(args) sys.exit() |