From 79af78d4222e0d3d8b7f6ea5916254f98e0908f6 Mon Sep 17 00:00:00 2001 From: Sebastian Parborg Date: Thu, 16 Jun 2011 21:09:41 +0200 Subject: Added branch for the new makefile parser --- filetypes/makefilecom.py | 82 +++++++++++++ filetypes/makefiles.py | 307 ++++++++++++++++++++++++++--------------------- 2 files changed, 253 insertions(+), 136 deletions(-) create mode 100644 filetypes/makefilecom.py diff --git a/filetypes/makefilecom.py b/filetypes/makefilecom.py new file mode 100644 index 0000000..6bfd29b --- /dev/null +++ b/filetypes/makefilecom.py @@ -0,0 +1,82 @@ +from ply import lex +from ply import yacc + +def com_interp(string): + tokens = ( + "COMMAND", + "COMMA", + "EQ", + "TEXT", + "PERCENT", + ) + states = ( + ("ccode", "exclusive"), #command code + ) + + # Match the first $(. Enter ccode state. + def t_ccode(t): + r'\$(\{|\()' + t.lexer.code_start = t.lexer.lexpos # Record the starting position + t.lexer.level = 1 # Initial level + t.lexer.begin('ccode') # Enter 'ccode' state + + # Rules for the ccode state + def t_ccode_newcom(t): + r'\$(\{|\()' + t.lexer.level +=1 + + def t_ccode_endcom(t): + r'(\}|\))' + t.lexer.level -=1 + + # If closing command, return the code fragment + if t.lexer.level == 0: + t.value = t.lexer.lexdata[t.lexer.code_start:t.lexer.lexpos-1] + t.type = "COMMAND" + t.lexer.begin('INITIAL') + return t + + def t_ccode_text(t): + "[^\$\(\{\)\}]" + + def t_PERCENT(t): + r"\%" + return t + + def t_EQ(t): + r"=" + return t + + def t_COMMA(t): + r"," + return t + + def t_TEXT(t): + r"[^ \n\t:=\\,]+" + return t + + def t_spacetab(t): + r"[ \t]" + pass + + def t_ANY_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + + lexer = lex.lex() + + lexer.input(string) + #for tok in lexer: + # print(tok) + + tokens = 0 + for tok in lexer: + tokens += 1 + print("gethere") + + print(tokens) + if tokens == 1: + print("gapp") + + +com_interp("HELOO") diff --git a/filetypes/makefiles.py b/filetypes/makefiles.py index 7448f5d..d87f66f 100644 --- a/filetypes/makefiles.py +++ b/filetypes/makefiles.py @@ -1,247 +1,282 @@ from ply import lex from ply import yacc +from makefilecom import com_interp def scanmakefile(makefile): tokens = ( "VAR", - "COLON", - "PERCENT", - "TEXT", - "DOLLAR", - "LPAR", - "RPAR", + "DOTVAR", "END", - "EQUAL", + "COL", + "SEMICOL", + "EQ", + "PEQ", + "CEQ", + "QEQ", + "TEXT", + "COMMAND", + "PERCENT", "ENDTAB", - "LESS", + "LIT", + "COMMA", ) states = ( ("com", "exclusive"), + ("ccode", "exclusive"), #command code ) + # Match the first $(. Enter ccode state. + def t_ccode(t): + r'\$(\{|\()' + t.lexer.code_start = t.lexer.lexpos # Record the starting position + t.lexer.level = 1 # Initial level + t.lexer.begin('ccode') # Enter 'ccode' state + + # Rules for the ccode state + def t_ccode_newcom(t): + r'\$(\{|\()' + t.lexer.level +=1 + + def t_ccode_endcom(t): + r'(\}|\))' + t.lexer.level -=1 + + # If closing command, return the code fragment + if t.lexer.level == 0: + t.value = t.lexer.lexdata[t.lexer.code_start:t.lexer.lexpos-1] + t.type = "COMMAND" + t.lexer.begin('INITIAL') + return t + + def t_ccode_text(t): + "[^\$\(\{\)\}]" + def t_begin_com(t): r"\#" t.lexer.push_state("com") + def t_com_other(t): + r"[^(\n|\\)]+" + pass + def t_com_newline(t): - r".*\\[ \t]*\n" + r".*\\\n" + t.lexer.lineno += 1 pass def t_com_END(t): r"\n" t.lexer.pop_state() + t.lexer.lineno += 1 return t - def t_VAR(t): - r"[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=" - t.value = t.value.split()[0].rstrip("=") #get the name of the var + def t_EQ(t): + r"=" return t - def t_TEXT(t): - #make sure it grabs "file-name" and "-flags" - r"-*\.*[a-zA-Z_][-|a-zA-Z0-9_]*" + def t_COL(t): + r":" + return t + + def t_SEMICOL(t): + r";" return t - def t_LESS(t): - r"\$<" + def t_bsdexe(t): #Create a cleaner version + r".*\!=.*" pass - def t_DOLLAR(t): - r"\$" + def t_PERCENT(t): + r"\%" return t - def t_COLON(t): - r"\:" + def t_PEQ(t): + r"[a-zA-Z_][a-zA-Z0-9_]*[ \t]*\+=" + t.value = t.value.split()[0].rstrip("+=") return t - def t_EQUAL(t): - r"\=" + def t_CEQ(t): + r"[a-zA-Z_][a-zA-Z0-9_]*[ \t]*:=" + t.value = t.value.split()[0].rstrip(":=") return t - def t_LPAR(t): - r"\(" + def t_QEQ(t): + r"[a-zA-Z_][a-zA-Z0-9_]*[ \t]*\?=" + t.value = t.value.split()[0].rstrip("?=") return t - def t_RPAR(t): - r"\)" + def t_VAR(t): + r"[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=" + t.value = t.value.split()[0].rstrip("=") #get the name of the var return t - def t_PERCENT(t): - r"\%" + def t_DOTVAR(t): + r"\.[a-zA-Z_][a-zA-Z0-9_]*[ \t]*=" + t.value = t.value.split()[0].rstrip("=") #get the name of the var return t def t_contline(t): r"\\\n" + t.lexer.lineno += 1 + pass + + def t_LIT(t): + r"\\." + t.value = t.value[1] #take the literal char + return t + + def t_COMMA(t): + r"," + return t + + def t_spacetab(t): + r"[ \t]" pass def t_ENDTAB(t): r"\n\t" + t.lexer.lineno += 1 + return t + + def t_TEXT(t): + r"[^ \n\t:\\,]+" return t def t_END(t): - r"[\n]+" + r"\n+" + t.lexer.lineno += t.value.count('\n') return t def t_ANY_error(t): + print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1) lexer = lex.lex() - #lexer.input(makefile) - #for tok in lexer: - # print(tok) + lexer.input(makefile) + for tok in lexer: + print(tok) #YACC begins here #a dict with values of defined variables variables = {} + ivars = [] #keep track of the immediate variables targets = [] #buildtargets, [[target,deps,options],[target2,.... - def p_target(p): + def p_peq(p): #immediate if peq was defined as immediate before else deferred """ - var : var textlst COLON textlst end - | textlst COLON textlst end - | var textlst COLON textlst options end - | textlst COLON textlst options end + end : end PEQ textlst end + | PEQ textlst end """ - if len(p) == 6: - if p[3] == ":": - targets.append([p[2][0],p[4],[]]) + if len(p) == 4: + if not p[1] in variables: + variables[p[1]] = p[2] + elif not p[1] in ivars: + variables[p[1]] += p[2] else: - targets.append([p[1][0],p[3],p[4]]) - elif len(p) == 5: - targets.append([p[1][0],p[3],[]]) - else: - targets.append([p[2][0],p[4],p[5]]) + textvalue = expand(p[2]) #expand any variables + variables[p[1]] = textvalue - def p_lonetarget(p): - """ - var : var textlst COLON options end - | textlst COLON options end - """ - if len(p) == 6: - targets.append([p[2][0],[],p[4]]) + elif not p[2] in variables: + variables[p[2]] = p[3] + elif not p[2] in ivars: + variables[p[2]] += p[3] else: - targets.append([p[1][0],[],p[3]]) + textvalue = expand(p[3]) #expand any variables + variables[p[2]] = textvalue - def p_depconv(p): + def p_ceq(p): #immediate """ - var : var command COLON command end - | var command COLON command options end + end : end CEQ textlst end + | CEQ textlst end """ - if len(p) == 6: - options = [] - else: - options = p[5] - - if p[2][0] == p[4][0] == "%": - for target in targets: - for dep in target[1]: - if p[2][1] in dep: - targets.append([dep,[(dep.replace(p[2][1],p[4][1]))],options]) + if len(p) == 4: + textvalue = expand(p[2]) #expand any variables + variables[p[1]] = textvalue + ivars.append(p[1]) else: - print("Unknown command") + textvalue = expand(p[3]) #expand any variables + variables[p[2]] = textvalue + ivars.append(p[2]) - def p_var(p): + def p_qeq(p): #deferred """ - var : VAR textlst end - | VAR end - | var VAR textlst end - | var VAR end + end : end QEQ textlst end + | QEQ textlst end """ - if isinstance(p[2],list): + if len(p) == 4 and not p[1] in variables: variables[p[1]] = p[2] - elif len(p) == 5: + elif not p[2] in variables: variables[p[2]] = p[3] - elif len(p) == 3: - variables[p[1]] = [] - else: - variables[p[2]] = [] - def p_endtab(p): + def p_var(p): #deferred """ - options : ENDTAB textlst - | options ENDTAB textlst + end : end VAR textlst end + | VAR textlst end """ - if len(p) == 3: - p[0] = p[2] - else: - p[0] = p[1] + p[3] - - def p_usecom(p): - """ - textlst : DOLLAR LPAR textlst COLON command RPAR - | textlst DOLLAR LPAR textlst COLON command RPAR - """ - if len(p) == 8: - o = 1 #offset - else: - o = 0 - p[3+o] = variables[p[3+o][0]] - p[0] = [] - if p[5][0] == "replace": - for text in p[3+o]: - p[0] += [text.replace(p[5+o][1],p[5+o][2])] + if len(p) == 4: + variables[p[1]] = p[2] else: - for text in p[3+o]: - p[0] += [text + p[5+o][1]] + variables[p[2]] = p[3] def p_textlst(p): """ textlst : textlst TEXT + | textlst command + | textlst LIT + | command | TEXT - | DOLLAR LPAR textlst RPAR - | textlst DOLLAR LPAR textlst RPAR + | LIT """ - if len(p) == 2: - p[0] = [p[1]] - elif len(p) == 3: - p[0] = p[1] + [p[2]] - elif len(p) == 5: - if p[3][0] in variables: - var = variables[p[3][0]] - p[0] = var - else: - p[0] = ["not defined"] + if len(p) == 3: + p[0] = p[1].append(p[2]) else: - if p[4][0] in variables: - var = variables[p[4][0]] - p[0] = p[1] + var - else: - p[0] = ["not defined"] + p[0] = [p[1]] def p_command(p): - """ - command : TEXT EQUAL TEXT - | PERCENT EQUAL PERCENT TEXT - | PERCENT TEXT - """ - if len(p) == 4: - p[0] = ["replace", p[1], p[3]] - elif len(p) == 5: - p[0] = ["append", p[4]] - else: - p[0] = [p[1],p[2]] + "command: COMMAND" + p[0] = [p[1]] #commands are lists within the testlst def p_end(p): """ - end : end END - | END + end : END + | end END """ def p_error(p): print("syntax error at '%s'" % p.type,p.lexpos) pass - yacc.yacc() + #yacc.yacc() - yacc.parse(makefile) + #yacc.parse(makefile) #for target in targets: # print(target) #print(variables) - return targets + #return targets + + +#immediate +#deferred + +def expand(lst): + newlst = [] + for item in lst: + if isinstance(item, list): + newitem = com_interp(item[0]) + else: + newitem = item + + newlst += newitem + + return newlst + +file="Makefile2" +with open(file, encoding="utf-8", errors="replace") as inputfile: + scanmakefile(inputfile.read()) -- cgit v1.2.3-65-gdbad