How to simplify this method (splitting unquoted, unbracketed, unescaped commas)?
Curious if this can be simplified...
internal static IEnumerable<string> Split(string str, char sep = ',')
{
int lastIndex = 0;
bool quoted = false;
bool escaped = false;
bool bracketed = false;
char lastQuote = '\0';
for (int i = 0; i < str.Length; ++i)
{
if (str[i] == '[')
{
if (!quoted && !escaped)
bracketed = true;
escaped = false;
}
else if (str[i] == ']')
{
if (!quoted && !escaped)
bracketed = false;
escaped = false;
}
else if (str[i] == '\\')
{
escaped = !escaped;
}
else if (str[i] == '"' || str[i] == '\'')
{
if (!escaped)
{
if (quoted)
{
if (lastQuote == str[i])
quoted = false;
}
else
{
quoted = true;
lastQuote = str[i];
}
}
escaped = false;
}
else if (str[i] == sep)
{
if (!quoted && !escaped && !bracketed)
{
yield return str.Substring(lastIndex, i - lastIndex);
lastIndex = i + 1;
}
escaped = false;
}
else
{
escaped = false;
}
}
yield return str.Substring(lastIndex);
}
Wrote this method to split on commas that aren't inside []
, are not quoted, and are not escaped. Is this inherently a tricky problem, or did I take a dumb approach?
Input:
foreach(var sel in Sharp开发者_StackOverflow社区Query.SplitCommas("\"comma, in quotes\", comma[in,brackets], comma[in \"quotes, and brackets\"], \"woah, 'nelly,' \\\"now you,re [talking, crazy\\\"\"")) {
Console.WriteLine(sel);
}
Expected output:
"comma, in quotes"
comma[in,brackets]
comma[in "quotes, and brackets"]
"woah, 'nelly,' \"now you,re [talking, crazy\""
A bit of an awkward choice to keep your automaton state. I would use a single variable or a stack in this case. So your current state is always stateStack.Peek()
. Easy to read. Easy to handle nested states.
edit: heres a quick sample. I'm sure you can expand it to add error handling and specifics of your rules.
enum ParserState
{
Text,
Bracketed,
Quoted,
EscapChar,
}
internal static IEnumerable<string> Split(string str, char sep)
{
int lastIdx = 0;
char c;
ParserState s;
Stack<ParserState> state = new Stack<ParserState>();
state.Push(ParserState.Text);
for (int i = 0; i < str.Length; i++)
{
c = str[i];
s = state.Peek();
if (s == ParserState.EscapChar
|| (s == ParserState.Bracketed && c == ']')
|| (s == ParserState.Quoted && c == '"'))
{
state.Pop();
}
else if (c == '[')
state.Push(ParserState.Bracketed);
else if (c == '"')
state.Push(ParserState.Quoted);
else if (c == '\\')
state.Push(ParserState.EscapChar);
else if (s == ParserState.Text && c == sep)
{
yield return str.Substring(lastIdx, i - lastIdx);
lastIdx = i + 1;
}
}
yield return str.Substring(lastIdx);
}
精彩评论