开发者

Parsing with C++

What's the easiest way to parse开发者_如何学编程 a file with

[[1, 2, 3, 4], [8, 9, 10, 11], ...]

into a vector of QRectF (a struct with four floats)?


have you looked at boost's spirit library? I think it's an amazing library and if I recall correctly they have examples in the tutorial very similar to what you want.

EDIT: This puts you at about the right place: http://www.boost.org/doc/libs/release/libs/spirit/doc/html/spirit/qi/tutorials/warming_up.html

EDIT: Sigh...I haven't looked at c++ in over a year (and I haven't looked at spirit in over 4 years) so this took about an hour to put together. Here's the working example:

# include <boost/spirit/include/qi.hpp>
using boost::spirit::qi::int_;
using boost::spirit::qi::char_;
using boost::spirit::qi::lit;
using boost::spirit::ascii::space;
using boost::spirit::ascii::space_type;
using boost::spirit::qi::rule;
using boost::spirit::qi::phrase_parse;

#include <boost/fusion/include/adapt_struct.hpp>

#include <string>
using std::string;

#include <iostream>
using std::cout;
using std::endl;

#include <vector>
using std::vector;

struct holder {
    int n1;
    int n2;
    int n3;
    int n4;
};
BOOST_FUSION_ADAPT_STRUCT(::holder, (int,n1) (int,n2) (int, n3) (int, n4))

int main() {
    string s = "[[1,2,3,4], [4,3,2,1]]";
    vector<holder> v;

    // I admit it, I was wrong. It's 3 lines of parsing.
    rule<string::iterator, holder(), space_type> holder_p = 
        lit("[") >> int_ >> ',' >> int_ >> ',' >> int_ >> ',' >> int_ >> ']';
    rule<string::iterator, vector<holder>(), space_type > holder_list_p = 
        char('[') >> (holder_p % ',') >> ']';
    bool r = phrase_parse(s.begin(), s.end(), holder_list_p, space, v);

    if (r) {
        for (vector<holder>::const_iterator it = v.begin(); it != v.end(); it++) {
            cout << "n1: " << it->n1 << ", n2: " << it->n2 << 
                    ", n3: " << it->n3 << ", n4: " << it->n4 << endl;
        }
    }
    return 0;
}


IMO, Spirit is probably a bit of overkill for the job at hand. The format is simple enough that you can handle it pretty easily with the standard library. The real question is whether you need to check that the data is in the right format (verify the presence of all the brackets and comma) or if you just want to read the numbers. If you need to verify the format, you could do something like this:

// warning: untested code.
bool verify(char a, char b) { 
    return a == b;
}

std::istream &operator>>(std::istream &is, QRectF &r) { 
     char bracket, comma;
     float num[4];
     bool good = true;

     is >> bracket;
     good &= verify(bracket, '[');
     for (int i=0; i<3; i++) {
         is >> num[i] >> comma;
         good &= verify(comma, ',');
     }
     is >> num[3];
     is >> bracket >> comma;
     good &= verify(bracket, ']');
     good &= verify(comma, ',');

     if (!good)
         is.setf(std::ios::failbit);
     else 
         for (int i=0; i<4; i++)
             r.value[i] = num[i];
     return is;
}

If you don't care about verifying the format, you can do pretty much the same, but get rid of all the verify stuff, as well as setting the stream state.

Alternatively, you can create a locale that treats everything except numbers as whitespace, so reading the data becomes trivial:

// warning: untested code:
struct digits_only: std::ctype<char> {
    digits_only(): std::ctype<char>(get_table()) {}

    static std::ctype_base::mask const* get_table() {
        static std::vector<std::ctype_base::mask> 
            rc(std::ctype<char>::table_size,std::ctype_base::space);

        std::fill(&rc['0'], &rc['9'], std::ctype_base::digit);
        return &rc[0];
    }
};

std::istream &operator>>(std::istream &is, QRectF &r) { 
    return is >> r.value[0] >> r.value[1] >> r.value[2] >> r.value[3];
}

int main() { 
    std::vector<QRectF> rects;
    std::ifstream infile("myfile.txt");
    infile.imbue(std::locale(std::locale(), new digits_only());

    std::copy(std::istream_iterator<QRectF>(infile),
              std::istream_iterator<QRectF>(),
              std::back_inserter(rects));
    return 0;
}


A very simple and straightforward approach is to let the old scanf to deal with the formatted input:

QRectF rectf;
// deal with opening square bracket here.
int r = fscanf (file, "[%f, %f, %f, %f]", 
                &rectf.a, &rectf.b,
                &rectf.c, &rectf.d);
if (r == 4)
    // pasring succeeded.
// consume whitespaces and a comma/closing square-bracket here.

A complete C program that could read and parse an input file into QRectF objects is given below:

#include <stdio.h>
#include <ctype.h>

typedef struct 
{
  float a;
  float b;
  float c;
  float d;
} QRectF;

static void skip_whitespaces (FILE*);

int
main (int argc, char** argv)
{ 
  FILE* file = NULL; 

  if (argc <= 1)
    {
      printf ("please specify file name.\n");
      return 1;
    }

  file = fopen (argv[1], "r");

  if (!file)
    {
      printf ("failed to open file.\n");
      return 1;
    }
  if (fgetc (file) != '[')
    {
      printf ("expected [ not found.\n");
      fclose (file);
      return 0;
    }

  while (!feof (file))
    {
      QRectF rectf;
      int r, c;

      skip_whitespaces (file);
      r = fscanf (file, "[%f, %f, %f, %f]", 
                  &rectf.a, &rectf.b, &rectf.c, &rectf.d);
      if (r != 4)
        {
          printf ("invalid data format.\n");
          break;
        }
      else
        printf ("%f, %f, %f, %f\n", rectf.a, rectf.b, rectf.c, rectf.d);

      skip_whitespaces (file);  
      c = fgetc (file);
      if (c == ']')
        break;
      else if (c != ',')
        {
          printf ("expected , not found.\n");
          break;
        }
    }

  fclose (file);
  return 0;
}

static void 
skip_whitespaces (FILE* file)
{
  while (!feof (file))
    {
      int c = fgetc (file);
      if (!isspace (c))
        {
          ungetc (c, file);
          break;
        }
    }
}
0

上一篇:

下一篇:

精彩评论

暂无评论...
验证码 换一张
取 消

最新问答

问答排行榜