开发者

Generate syntax tree for simple math operations

I am trying to generate a syntax tree, for a given string with simple math operators (+, -, *, /, and parenthesis). Given the string "1 + 2 * 3":

Generate syntax tree for simple math operations

It should return an array like this:

["+",
 [1,
  ["*",
   [2,3]
  ]
 ]
]

I made a function to transform "1 + 2 * 3" in [1,"+",2,"*",3].

The problem is: I have no idea to give priority to certain operations.

My code is:

function isNumber(ch){
    switch (ch) {
        case '0':
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
        case '8':
        case '9':
        case '.':
            return true;
        break;
        default:
            return false;
            break;
    }
}

function generateSyntaxTree(text){
    if (typeof text != 'string') return [];
    var code = text.replace(new RegExp("[ \t\r\n\v\f]", "gm"), "");
    var codeArray = [];
    var syntaxTree = [];

    // Put it in its on scope
    (function(){
        var lastPos = 0;
        var wasNum = false;
        for (var i = 0; i < code.length; i++) {
            var cChar = code[i];
            if (isNumber(cChar)) {
                if (!wasNum) {
                    if (i != 0) {
                        codeArray.push(code.slice(lastPos, i));
                    }
                    lastPos = i;
                    wasNum = true;
                }
            } else {
                if (wasNum) {
                    var n = Number(code.slice(lastPos, i));
                    if (isNaN(n)) {
                        throw new Error("Invalid Number");
         开发者_StackOverflow社区               return [];
                    } else {
                        codeArray.push(n);
                    }
                    wasNum = false;
                    lastPos = i;
                }
            }
        }
        if (wasNum) {
            var n = Number(code.slice(lastPos, code.length));
            if (isNaN(n)) {
                throw new Error("Invalid Number");
                return [];
            } else {
                codeArray.push(n);
            }
        }
    })();

    // At this moment, codeArray = [1,"+",2,"*",3]

    return syntaxTree;
}

alert('Returned: ' + generateSyntaxTree("1 + 2 * 3"));


The way to do a top down parser, if not using FLEX/BISON or any other similar package is to first write a tokenizer that can parse input and serve tokens.

Basically you need a tokenizer that provides getNextToken, peekNextToken and skipNextToken.

Then you work your way down using this structure.

// parser.js
var input, currToken, pos;

var TOK_OPERATOR = 1;
var TOK_NUMBER = 2;
var TOK_EOF = 3;

function nextToken() {
  var c, tok = {};

  while(pos < input.length) {
    c = input.charAt(pos++);
    switch(c) {
      case '+':
      case '-':
      case '*':
      case '/':
      case '(':
      case ')':
    tok.op = c;
    tok.type = TOK_OPERATOR;
    return tok;

      case '0':
      case '1':
      case '2':
      case '3':
      case '4':
      case '5':
      case '6':
      case '7':
      case '8':
      case '9':
    tok.value = c;
    tok.type = TOK_NUMBER;
    return tok;

      default:
    throw "Unexpected character: " + c;
    }
  }
  tok.type = TOK_EOF;
  return tok;
}

function getNextToken() {
  var ret;

  if(currToken)
    ret = currToken;
  else
    ret = nextToken();

  currToken = undefined;

  return ret;
}

function peekNextToken() {
  if(!currToken)
    currToken = nextToken();

  return currToken;
}

function skipNextToken() {
  if(!currToken)
    currToken = nextToken();
  currToken = undefined;
}

function parseString(str) {
  input = str;
  pos = 0;

  return expression();
}


function expression() {
  return additiveExpression();
}

function additiveExpression() {
  var left = multiplicativeExpression();
    var tok = peekNextToken();
    while(tok.type == TOK_OPERATOR && (tok.op == '+' || tok.op == '-') ) {
        skipNextToken();
        var node = {};
        node.op = tok.op;
        node.left = left;
        node.right = multiplicativeExpression();
        left = node;
    tok = peekNextToken();
    }
    return left;
}

function multiplicativeExpression() {
  var left = primaryExpression();
    var tok = peekNextToken();
    while(tok.type == TOK_OPERATOR &&  (tok.op == '*' || tok.op == '/') ) {
        skipNextToken();
        var node = {};
        node.op = tok.op;
        node.left = left;
        node.right = primaryExpression();
        left = node;
    tok = peekNextToken();
    }
    return left;
}

function primaryExpression() {
  var tok = peekNextToken();
  if(tok.type == TOK_NUMBER) {
    skipNextToken();
    node = {};
    node.value = tok.value;
    return node;
  }
  else
  if(tok.type == TOK_OPERATOR && tok.op == '(') {
    skipNextToken();
    var node = expression(); // The beauty of recursion
    tok = getNextToken();
    if(tok.type != TOK_OPERATOR || tok.op != ')')
      throw "Error ) expected";
    return node    
  }
  else
    throw "Error " + tok + " not exptected";
}

As you can see, you start by requesting the least privileged operation, which requires the next higher privileged operation as its left and right term and so on. Unary operators has a little different structure. The neat thing is the recursion at the end when a parenthesis is encountered.

Here is a demo page that uses the parser and renders the parse-tree (had the code for it laying around...)

<html>
<head>
<title>tree</title>
<script src="parser.js"></script>
</head>

<body onload="testParser()">

<script>

function createTreeNode(x, y, val, color) {
  var node = document.createElement("div");
  node.style.position = "absolute";
  node.style.left = "" + x;
  node.style.top = "" + y;

  node.style.border= "solid";
  node.style.borderWidth= 1;
  node.style.backgroundColor= color;

  node.appendChild(document.createTextNode(val));

  return node;
};

var yStep = 24;
var width = 800;
var height = 600;

var RED = "#ffc0c0";
var BLUE = "#c0c0ff";

container = document.createElement("div");
container.style.width = width;
container.style.height = height;
container.style.border = "solid";

document.body.appendChild(container);

var svgNS = "http://www.w3.org/2000/svg";

function renderLink(x1, y1, x2, y2)
{
  var left = Math.min(x1,x2);
  var top = Math.min(y1,y2);

  var width = 1+Math.abs(x2-x1);
  var height = 1+Math.abs(y2-y1);

  var svg = document.createElementNS(svgNS, "svg");
  svg.setAttribute("x", left);
  svg.setAttribute("y",  top);
  svg.setAttribute("width", width );
  svg.setAttribute("height", height );

  var line = document.createElementNS(svgNS,"line");

  line.setAttribute("x1", (x1 - left) );
  line.setAttribute("x2", (x2 - left) );
  line.setAttribute("y1", (y1 - top) );
  line.setAttribute("y2", (y2 - top) );
  line.setAttribute("stroke-width",  "1");
  line.setAttribute("stroke",  "black");
  svg.appendChild(line);

  var div = document.createElement("div");
  div.style.position = "absolute";
  div.style.left = left;
  div.style.top = top;
  div.style.width = width;
  div.style.height = height;

  div.appendChild(svg);
  container.appendChild(div);  
}

function getHeight(dom) {
    var h = dom.offsetHeight;
    return h;
}

function getWidth(dom) {
    var w = dom.offsetWidth;
    return w;
}

function renderTree(x, y, node, width, height)
{
    if(height < 1.5*yStep)
    height = 1.5*yStep;

    var val;
    if(node.op) {
      val = node.op;
      color = BLUE;
    }
    else
      if(node.value) {
    val = node.value;
    color = RED;
      }
      else
    val = "?";

    var dom = createTreeNode(x, y, val, color);
    container.appendChild(dom);

    var w = getWidth(dom);
    var h = getHeight(dom);

    var nx, ny;

    var child;

    if(node.left) {
    nx = x - width/2;
    ny = y+height;
    var child = renderTree(nx, ny, node.left, width/2, height/2);
        renderLink(x+w/2, y+h, nx+getWidth(child)/2, ny);
    }

    if(node.right) {
    nx = x + width/2;
    ny = y+height;

    child = renderTree(nx, ny, node.right, width/2, height/2);
        renderLink(x+w/2, y+h, nx+getWidth(child)/2, ny);
    }
    return dom;
}

var root;

function testParser()
{
  var str = "1+2*5-5*(9+2)";

  var exp = document.createElement("div");
  exp.appendChild(document.createTextNode(str));
  container.appendChild(exp);
  var tree = parseString(str);
  renderTree(width/2, 20, tree, width/2, 4*yStep);
}

</script>

</body>
</html>


The thing to do is to use a parser generator like flex or ANTLR (searching at google will find one for your language).

But if you are doing this for fun or to learn how parsers work, look up wikipedia for recursive descent parser.

A simple recursive descent parser can be easily made for simple expressions like this. You can define the grammar as:

<expression> ::= <term> | <term> <add_op> <expression>
<term> ::= <factor> | <factor> <mul_op> <term>
<factor> ::= ( <expression> ) | <number> 
<add_op> ::= + | -
<mul_op> ::= * | /

Notice that by making the rule for <term> contain the rule for <factor> this grammar makes sure all multiplication/division operations occur lower in the parse tree than any addition/subtraction. This ensures those operations are evaluated first.


Have you read up on the theory behind parsers? Wikipedia (as always) has some good articles to read:

  • LR parser
  • Recursive descent parser


I built a fun little calculator once and had the same problem as you, which I solved by building the syntax tree without keeping the order precedence in mind,firstly. Each node has a precedence value, and when eval'ing non-constants, I'd check the left node: if it has lower precedence, I'd rotate the tree clockwise: bring it into evaluation and evaluate that first, likewise for the right node. then I'd just try to evaluate again. It seemed to work well enough for me.

0

上一篇:

下一篇:

精彩评论

暂无评论...
验证码 换一张
取 消

最新问答

问答排行榜