
Javascript do not match str surrounded with quotes

I'm writing开发者_运维知识库 a regular expression in javascript that replaces whitespaces except when:

  1. Some specific syntax is in front of the whitespace
  2. It's surrounded in both single as double quotes (escaped quotes within quotes excluded)

Now, I've got a big part working. It matches all patterns that doesn't have the specific syntax in front of the whitespace, however, I'm stuck with the quote part.

return str.replace(/(function|new|return|var)?\s/g, function($0, $1) {
    return $1 ? $0 : '';

I've done quite some testing, but I just can't figure it out. Thanks in advance.

You can use:

var str = "foo  \"b a \\\" r\" new y 'l o l' foo lol; var x = new 'fo \\' o' ";

var result = str.replace(/(function|new|return|var)?\s+(?=(?:[^\\"']|\\.)*(?:(?:"(?:[^\\"]|\\.)*"|'(?:[^\\']|\\.)*'))*(?:[^\\"']|\\.)*$)/gm,
function($0, $1) { return $1 ? $0 : ''; });

See http://jsfiddle.net/qCeC4/

Lookahead part in Perl /x form:


Note: As I said before, this is not a good way to parse JS, and will break on comments, regex quoting, and who knows what else.

Note2: Forgot to add that this only works for "valid" quoting, all quotes must be closed.

My suggestions:

  • mimic lookbehind in javascript (though this hack may not be perfect).

  • use a recursive descent parser (maybe antlr)?

  • Or manually write code to do it for you. Below is my first draft version of what I'm thinking (there's still some pseudo-code ):

function go(str) {
    var quoteStart, quoteEnd, quotedRanges, from, retval;
    quotedRanges = []; //quotedRanges holds the indexes inclusively within which nothing should be changed because it's quoted.

    quoteStart = str.indexOf('"');

    if( quoteStart > -1 ) {
        from = quoteStart;
        while (from !== false) {
            quoteEnd = str.indexOf('"', from);

            if (quoteEnd == -1) { //There is an unmatched quote. We pretend that it is closed off at the end of the string.
                quoteEnd = str.len;
                from = false;
            } else if(str.charAt(quoteEnd - 1) == "\\") {
                from = quoteEnd;
            } else { //we found the ending quote index.
                from = false;
        quotedRanges.push([quoteStart, quoteEnd]);

    retval = str.replace(/(function|new|return|var)?\s/g, function($0, $statement) {
        if($0 within on of quotedRanges)
            return $0;
        return $statement ? $0 : '';
    return retval;

assert(1, go("") == "");
assert(2, go("function ") == "function ");
assert(3, go(" ") == "");
assert(4, go('" "') == '" "');
assert(5, go('" ') == '" ');
assert(6, go('"x x"') == '"x x"');
assert(6, go('"new x"') == '"new x"');
assert(7, go(' "x x"') == '"x x"');
assert(8, go("' '") == "' '");
assert(9, go("' \\' '") == "' \\' '");

function assert(num, statement) {
    if(!statement) {
        document.write('test #' + num + ' failed! <br/>');




验证码 换一张
取 消

