Splitting a string only when the delimeter is not enclosed in quotation marks

2022-12-15 06:50 问答作者：

I need to write a split function in JavaScript that splits a string into an array, on a comma...but the comma must not be enclosed in quotation marks (' and ").

Here are three examples and how the result (an array) should be:

"peanut, butter, jelly"
  -> ["peanut", "butter", "jelly"]

"peanut, 'butter, bread', 'jelly'"
  -> ["peanut", "butter, bread", "jelly"]

'peanut, "butter, bread", "jel开发者_StackOverflowly"'
  -> ["peanut", 'butter, bread', "jelly"]

The reason I cannot use JavaScript's split method is because it also splits when the delimiter is enclosed in quotation marks.

How can I accomplish this, maybe with a regular expression ?

As regards the context, I will be using this to split the arguments passed from the third element of the third argument passed to the function you create when extending the jQuery's $.expr[':']. Normally, the name given to this parameter is called meta, which is an array that contains certain info about the filter.

Anyways, the third element of this array is a string which contains the parameters that are passed with the filter; and since the parameters in a string format, I need to be able to split them correctly for parsing.

What you are asking for is essentially a Javascript CSV parser. Do a Google search on "Javascript CSV Parser" and you'll get lots of hits, many with complete scripts. See also Javascript code to parse CSV data

Well, I already have a jackhammer of a solution written (general code written for something else), so just for kicks . . .

function Lexer () {
  this.setIndex = false;
  this.useNew = false;
  for (var i = 0; i < arguments.length; ++i) {
    var arg = arguments [i];
    if (arg === Lexer.USE_NEW) {
      this.useNew = true;
    }
    else if (arg === Lexer.SET_INDEX) {
      this.setIndex = Lexer.DEFAULT_INDEX;
    }
    else if (arg instanceof Lexer.SET_INDEX) {
      this.setIndex = arg.indexProp;
    }
  }
  this.rules = [];
  this.errorLexeme = null;
}

Lexer.NULL_LEXEME = {};

Lexer.ERROR_LEXEME = { 
  toString: function () {
    return "[object Lexer.ERROR_LEXEME]";
  }
};

Lexer.DEFAULT_INDEX = "index";

Lexer.USE_NEW = {};

Lexer.SET_INDEX = function (indexProp) {
  if ( !(this instanceof arguments.callee)) {
    return new arguments.callee.apply (this, arguments);
  }
  if (indexProp === undefined) {
    indexProp = Lexer.DEFAULT_INDEX;
  }
  this.indexProp = indexProp;
};

(function () {
  var New = (function () {
    var fs = [];
    return function () {
      var f = fs [arguments.length];
      if (f) {
        return f.apply (this, arguments);
      }
      var argStrs = [];
      for (var i = 0; i < arguments.length; ++i) {
        argStrs.push ("a[" + i + "]");
      }
      f = new Function ("var a=arguments;return new this(" + argStrs.join () + ");");
      if (arguments.length < 100) {
        fs [arguments.length] = f;
      }
      return f.apply (this, arguments);
    };
  }) ();

  var flagMap = [
      ["global", "g"]
    , ["ignoreCase", "i"]
    , ["multiline", "m"]
    , ["sticky", "y"]
    ];

  function getFlags (regex) {
    var flags = "";
    for (var i = 0; i < flagMap.length; ++i) {
      if (regex [flagMap [i] [0]]) {
        flags += flagMap [i] [1];
      }
    }
    return flags;
  }

  function not (x) {
    return function (y) {
      return x !== y;
    };
  }

  function Rule (regex, lexeme) {
    if (!regex.global) {
      var flags = "g" + getFlags (regex);
      regex = new RegExp (regex.source, flags);
    }
    this.regex = regex;
    this.lexeme = lexeme;
  }

  Lexer.prototype = {
      constructor: Lexer

    , addRule: function (regex, lexeme) {
        var rule = new Rule (regex, lexeme);
        this.rules.push (rule);
      }

    , setErrorLexeme: function (lexeme) {
        this.errorLexeme = lexeme;
      }

    , runLexeme: function (lexeme, exec) {
        if (typeof lexeme !== "function") {
          return lexeme;
        }
        var args = exec.concat (exec.index, exec.input);
        if (this.useNew) {
          return New.apply (lexeme, args);
        }
        return lexeme.apply (null, args);
      }

    , lex: function (str) {
        var index = 0;
        var lexemes = [];
        if (this.setIndex) {
          lexemes.push = function () {
            for (var i = 0; i < arguments.length; ++i) {
              if (arguments [i]) {
                arguments [i] [this.setIndex] = index;
              }
            }
            return Array.prototype.push.apply (this, arguments);
          };
        }
        while (index < str.length) {
          var bestExec = null;
          var bestRule = null;
          for (var i = 0; i < this.rules.length; ++i) {
            var rule = this.rules [i];
            rule.regex.lastIndex = index;
            var exec = rule.regex.exec (str);
            if (exec) {
              var doUpdate = !bestExec 
                || (exec.index < bestExec.index)
                || (exec.index === bestExec.index && exec [0].length > bestExec [0].length)
                ;
              if (doUpdate) {
                bestExec = exec;
                bestRule = rule;
              }
            }
          }
          if (!bestExec) {
            if (this.errorLexeme) {
              lexemes.push (this.errorLexeme);
              return lexemes.filter (not (Lexer.NULL_LEXEME));
            }
            ++index;
          }
          else {
            if (this.errorLexeme && index !== bestExec.index) {
              lexemes.push (this.errorLexeme);
            }
            var lexeme = this.runLexeme (bestRule.lexeme, bestExec);
            lexemes.push (lexeme);
          }
          index = bestRule.regex.lastIndex;
        }
        return lexemes.filter (not (Lexer.NULL_LEXEME));
      }
  };
}) ();

if (!Array.prototype.filter) {
  Array.prototype.filter = function (fun) {
    var len = this.length >>> 0;
    var res = [];
    var thisp = arguments [1];
    for (var i = 0; i < len; ++i) {
      if (i in this) {
        var val = this [i];
        if (fun.call (thisp, val, i, this)) {
          res.push (val);
        }
      }
    }
    return res;
  };
}

Now to use the code for your problem:

function trim (str) {
  str = str.replace (/^\s+/, "");
  str = str.replace (/\s+$/, "");
  return str;
}

var splitter = new Lexer ();
splitter.setErrorLexeme (Lexer.ERROR_LEXEME);
splitter.addRule (/[^,"]*"[^"]*"[^,"]*/g, trim);
splitter.addRule (/[^,']*'[^']*'[^,']*/g, trim);
splitter.addRule (/[^,"']+/g, trim);
splitter.addRule (/,/g, Lexer.NULL_LEXEME);

var strs = [
    "peanut, butter, jelly"
  , "peanut, 'butter, bread', 'jelly'"
  , 'peanut, "butter, bread", "jelly"'
  ];

// NOTE: I'm lazy here, so I'm using Array.prototype.map, 
//       which isn't supported in all browsers.
var splitStrs = strs.map (function (str) {
  return splitter.lex (str);
});

var str = 'text, foo, "haha, dude", bar';
var fragments = str.match(/[a-z]+|(['"]).*?\1/g);

Even better (supports escaped " or ' inside the strings):

var str = 'text_123 space, foo, "text, here\", dude", bar, \'one, two\', blob';
var fragments = str.match(/[^"', ][^"',]+[^"', ]|(["'])(?:[^\1\\\\]|\\\\.)*\1/g);

// Result:
0: text_123 space
1: foo
2: "text, here\", dude"
3: bar
4: 'one, two'
5: blob

If you can control the input to enforce that the string will be enclosed in double-quotes " and that all elements withing the string will be enclosed in single-quotes ', and that no element can CONTAIN a single-quote, then you can split on , '. If you CAN'T control the input, then using a regular expression to sort/filter/split the input would be about as useful as using a regular expression to match against xhtml (see: RegEx match open tags except XHTML self-contained tags)

继续阅读：javascript regex

Splitting a string only when the delimeter is not enclosed in quotation marks

更多精彩内容

精彩评论

最新问答

央视是哪个频道？

请问买过的朋友，舒提啦旅行箱实际使用体验如何？？

检查不孕不育需要的费用？

海信ULED电视画质有什么不同的地方?？

钉子可以挂的住画框幕布吗？

问答排行榜

王昌瑞《潜梦追凶》剧组庆生新锐演员未来可期？

Is it allowed to ask users to enter credit card details for own payment method?

Escaping "<" in Perl-generated XML

imessage会显示已读吗？

微信重新建群怎么建？