#!/usr/bin/env python3 # note that level is used to make a thing that kind of looks like a tree """ Devin Wild Thomas dwt@cs.unh.edu adapted from OCaml implementation by Wheeler Ruml, ruml at cs.unh.edu, who wrote the body of this comment Here's the grammar: expr -> ( expr op expr ) -> number op -> +, -, *, or / number -> [0-9.]+ Notice that the only choice is how to parse an expr, and this can be determined by looking at its first character (is it a '('?). Note how the structure of the parsing functions directly reflects the structure of the grammar. Most parsing functions return a float and the index to continue parsing from. Recursion is used wherever the right-hand side of a grammar rule mentions its own left-hand non-terminal. This is why this style of hand-coded parser is called a "recursive-descent" parser. This example code does little error checking and will probably throw an exception (eg, access the input string out of bounds) on malformed input. """ import sys op_lookup = { "+": lambda x,y: x+y, # this sort of thing is why you might want to parse in python "-": lambda x,y: x-y, "*": lambda x,y: x*y, "/": lambda x,y: x/y } def parse_op(s, i, level): print(level + "parse_op: " + s[i:], end = " ") # s - string being parsed # i - index of operator to parse in s # returns (operator, next_i) op_string = s[i] print("consumed: " + s[i]) return (op_lookup[op_string], i+1) def parse_num(s, i, level): print(level + "parse_num: " + s[i:], end = " ") for j in range(i, len(s)): # find the end of the number if not (s[j].isdigit() or (s[j] == ".")): break print("consumed: " + s[i:j]) return (float(s[i:j]), j) # returns the number and the next_i def parse_expr(s, i, level): old_i = i ## for printing print(level + "parse_expr: " + s[i:]) if s[i] != "(": ## it's a number return parse_num(s, i, level + "\t") i += 1 #drop the paren left_expression_result, i = parse_expr(s, i, level + "\t") #notice no spaces in expression operator, i = parse_op(s, i, level + "\t") right_expression_result, i = parse_expr(s, i, level +"\t") res = operator(left_expression_result, right_expression_result) # notice that operator stores a function print(level +"consumed: " + s[old_i:i+1]) return res, i+1 #don't forget to consume the right paren def parse(s): print("Flattened tree") print(parse_expr(s, 0, "")[0]) if __name__ == "__main__": if (len(sys.argv) != 2): print('Usage: ./arith_parse.py "expression"') else: parse(sys.argv[1])