开发者

Parsing problem H.264 sequence parameter set data

I need to parse image size from raw H.264 data. The following test code works for the cases I testet, but now if fails on "Z0IAIOKQCgDLYC3AQEBpB4kRUA==". Result should be 1280x800, but it gives me 640x1616:

public static void main(String[] args) {
    decode(StringUtils.fromBase64("Z0IAKeKQCgDLYC3AQEBpB4kRUA=="));
    decode(StringUtils.fromBase64("Z0IAIOKQCgDLYC3AQEBpB4kRUA=="));
    decode(StringUtils.fromBase64("Z0IAHuNQFAe2AtwEBAaQeJEV"));
}

static int pos;
static byte[] data;

private static void decode(byte[] data) {
    try {
        System.out.println();
        System.out.println(StringUtils.toHex(data));
        System.out.println(StringUtils.toBin(data, 0, data.length, true));
        System.out.println();
        pos = 0;
        T.data = data;
        int profile_idc = getU(8);
        int constraint_set0_flag = getU(1);
        int const开发者_运维技巧raint_set1_flag = getU(1);
        int constraint_set2_flag = getU(1);
        int constraint_set3_flag = getU(1);
        int reserved_zero_4bits = getU(4);
        int level_idc = getU(8);
        int seq_parameter_set_id = uev();
        int log2_max_frame_num_minus4 = uev();
        int pict_order_cnt_type = uev();
        System.out.println("pict_order_cnt_type=" + pict_order_cnt_type);
        if (pict_order_cnt_type == 0) {
            uev();
        } else if (pict_order_cnt_type == 1) {
            getU(1);
            sev();
            sev();
            int n = uev();
            System.out.println("n*sev, n=" + n);
            for (int i = 0; i < n; i++)
                sev();
        }
        int num_ref_frames = uev();
        getU(1);
        int pic_width = (uev() + 1) * 16;
        int pic_height = (uev() + 1) * 16;
        int frame_mbs_only_flag = getU(1);
        System.out.println(pic_width + " x " + pic_height);
    } catch (Exception e) {
        e.printStackTrace(System.out);
    }
}

private static int ev(boolean signed) {
    int bitcount = 0;
    StringBuilder expGolomb = new StringBuilder();
    while (getBit() == 0) {
        expGolomb.append('0');
        bitcount++;
    }
    expGolomb.append("/1");
    int result = 1;
    for (int i = 0; i < bitcount; i++) {
        int b = getBit();
        expGolomb.append(b);
        result = result * 2 + b;
    }
    result--;
    if (signed) {
        result = (result + 1) / 2 * (result % 2 == 0 ? -1 : 1);
        System.out.println("getSe(v) = " + (result) + " " + expGolomb);
    } else {
        System.out.println("getUe(v) = " + (result) + " " + expGolomb);
    }
    return result;
}

private static int uev() {
    return ev(false);
}

private static int sev() {
    return ev(true);
}

private static int getU(int bits) {
    int result = 0;
    for (int i = 0; i < bits; i++) {
        result = result * 2 + getBit();
    }
    System.out.println("getU(" + bits + ") = " + result);
    return result;
}

private static int getBit() {
    int mask = 1 << (7 - (pos & 7));
    int idx = pos >> 3;
    pos++;
    return ((data[idx] & mask) == 0) ? 0 : 1;
}

Output for failing case:

67420020E2900A00CB602DC040406907891150
01100111 01000010 00000000 00100000 11100010 10010000 00001010 00000000 11001011 01100000 00101101 11000000 01000000 01000000 01101001 00000111 10001001 00010001 01010000 

getU(8) = 103
getU(1) = 0
getU(1) = 1
getU(1) = 0
getU(1) = 0
getU(4) = 2   << shouldn't this be 0 ? is same with correct parsing!
getU(8) = 0
getUe(v) = 3 00/100
getUe(v) = 13 000/1110   << log2_max_frame_num_minus4 shall be 0-12, inclusive.
getUe(v) = 4 00/101    
pict_order_cnt_type=4    << pic_order_cnt_type shall be 0-2, inclusive.
getUe(v) = 3 00/100       
getU(1) = 0
getUe(v) = 39 00000/101000
getUe(v) = 100 000000/1100101
getU(1) = 1
640 x 1616


Perhaps not what you asked for, but have a look in the reference implementation, to see how they do the parsing...

  1. Download h.264 parser from http://www.w6rz.net/h264_parse.zip (from this thread @ doom9 http://forum.doom9.org/archive/index.php/t-133070.html)

  2. Download the H.264 reference SW from http://iphome.hhi.de/suehring/tml/

This should get you started. BTW bitstream is described in Annex. B. in the specs. Download it from ITU http://www.itu.int/rec/T-REC-H.264-201003-I/en


There is indeed a bug in the code (actually there are three).

  • The first bug is that the first byte is the NAL header (containing: forbidden_bit(1bit), nal_ref_idc(2bits) and a nal_unit_type(5bits, value should be 7 to indicate a SPS unit.
  • The second bug is that there are a total of 5 constraint_set#_flags instead of 3 (accounting for two bits described in bug 3).
  • The third bug is that the number of reserved_zero_#bits is 2 in the current spec. I guess this might have been different in the past but I'm not sure about that.

So I've made some alterations to the decode method and came up with this version which works for me (btw my class is called H264Parser hence the statement H264Parser.data = data;):

private static void decode(byte[] data) {
    try {
        System.out.println();
            System.out.println(StringUtils.toHex(data));
            System.out.println(StringUtils.toBin(data, 0, data.length, true));
            System.out.println();
        pos = 0;
        H264Parser.data = data;

        int forbidden_zero_bit = getU(1);
        System.out.println("forbidden_zero_bit " + forbidden_zero_bit);
        int nal_ref_idc = getU(2);
        int nal_unit_type = getU(5);
        System.out.println("nal_unit_type (should be 7 for SPS) " + nal_unit_type);
        //END of NAL_header

        //Start of SPS data
        int profile_idc = getU(8);
        int constraint_set0_flag = getU(1);
        int constraint_set1_flag = getU(1);
        int constraint_set2_flag = getU(1);
        int constraint_set3_flag = getU(1);
        int constraint_set4_flag = getU(1);
        int constraint_set5_flag = getU(1);
        //The current version of the spec states that there are two reserved bits
        int reserved_zero_2bits = getU(2);
        System.out.println("reserved_zero_2bits" + reserved_zero_2bits);
        int level_idc = getU(8);
        int seq_parameter_set_id = uev();
        int log2_max_frame_num_minus4 = uev();
        int pict_order_cnt_type = uev();
        System.out.println("pict_order_cnt_type=" + pict_order_cnt_type);
        if (pict_order_cnt_type == 0) {
            uev();
        } else if (pict_order_cnt_type == 1) {
            getU(1);
            sev();
            sev();
            int n = uev();
            System.out.println("n*sev, n=" + n);
            for (int i = 0; i < n; i++)
                sev();
        }
        int num_ref_frames = uev();
        getU(1);
        int pic_width = (uev() + 1) * 16;
        int pic_height = (uev() + 1) * 16;
        int frame_mbs_only_flag = getU(1);
        System.out.println(pic_width + " x " + pic_height);
    } catch (Exception e) {
        e.printStackTrace(System.out);
    }
}


You have a bug in your code, you should discard the first byte, that's NAL unit type:

67 - NAL unit type ( SPS )
42 - Profile Idc ( Baseline profile in your case )
00 - reserved zero
29 - level ( 41 )
e2 - SPS id ( 0 ), max frame num - 4 ( 0 ), POC type ( 0 ), 
90 - uev ( 5 ), num ref frames ( 1 ), u ( 0 )
etc

You can actually use JCodec class https://github.com/jcodec/jcodec/blob/master/src/main/java/org/jcodec/codecs/h264/io/model/SeqParameterSet.java as out-of-the-box SPS parsing routine.


in case of some profiles you should also read scale matrix after seq_parameter_set_id:

        int seq_parameter_set_id = uev();

        if (profile_idc == 100 || profile_idc == 110 ||
                profile_idc == 122 || profile_idc == 244 || profile_idc == 44 ||
                profile_idc == 83 || profile_idc == 86 || profile_idc == 118 ||
                profile_idc == 128) {
            int chroma_format_idc = uev();
            if( chroma_format_idc == 3 ){
                int separate_colour_plane_flag = getU(1);
            }

            int bit_depth_luma_minus8 = uev();
            int bit_depth_chroma_minus8 = uev();
            int qpprime_y_zero_transform_bypass_flag = getU(1);
            int seq_scaling_matrix_present_flag = getU(1);
            if( seq_scaling_matrix_present_flag == 1) {
                int count = (chroma_format_idc != 3) ? 8 : 12;
                for (int i = 0; i <count; i++) {
                    int seq_scaling_list_present_flag_i_ = getU(1);
                    if (seq_scaling_list_present_flag_i_ == 1) {
                        if (i < 6)
                            scaling_list(16);

                        else
                            scaling_list(64);
                    }
                }
            }
        }


        int log2_max_frame_num_minus4 = uev();

where scaling list is defined as folows:

int[] scaling_list(int sizeOfScalingList){
    int lastScale = 8;
    int nextScale = 8;
    int delta_scale;
    boolean useDefaultScalingMatrixFlag;
    int[] scalingList = new int[sizeOfScalingList];
    for(int j = 0; j < sizeOfScalingList; j++ ) {
        if( nextScale != 0 ) {
            delta_scale = sev();
            nextScale = ( lastScale + delta_scale + 256 ) % 256;
            useDefaultScalingMatrixFlag = ( j == 0 && nextScale == 0 );
        }
        scalingList[ j ] = ( nextScale == 0 ) ? lastScale : nextScale;
        lastScale = scalingList[ j ];
    }
    return scalingList;
}
0

上一篇:

下一篇:

精彩评论

暂无评论...
验证码 换一张
取 消

最新问答

问答排行榜