0746445f by David LaPalomento

Simply emulation byte filtering

The original algorithm was incorrectly including the nal_unit_type in the returned RBSP when there were no emulation bytes to filter. This version is slower because it examines every byte individually but it's simpler to understand. Add a test case for H264 stream parsing that demonstrates the issue.
1 parent 56e2bde3
......@@ -8,7 +8,6 @@
(function(window) {
var
ExpGolomb = window.videojs.hls.ExpGolomb,
FlvTag = window.videojs.hls.FlvTag,
......@@ -16,22 +15,8 @@
this.sps = []; // :Array
this.pps = []; // :Array
this.addSPS = function(size) { // :ByteArray
console.assert(size > 0);
var tmp = new Uint8Array(size); // :ByteArray
this.sps.push(tmp);
return tmp;
};
this.addPPS = function(size) { // :ByteArray
console.assert(size);
var tmp = new Uint8Array(size); // :ByteArray
this.pps.push(tmp);
return tmp;
};
this.extraDataExists = function() { // :Boolean
return 0 < this.sps.length;
return this.sps.length > 0;
};
// (sizeOfScalingList:int, expGolomb:ExpGolomb):void
......@@ -56,61 +41,37 @@
};
/**
* NAL unit
* |- NAL header -|------ RBSP ------|
*
* NAL unit: Network abstraction layer unit. The combination of a NAL
* header and an RBSP.
* NAL header: the encapsulation unit for transport-specific metadata in
* an h264 stream. Exactly one byte.
* RBSP: raw bit-stream payload. The actual encoded video data.
*
* SPS: sequence parameter set. Part of the RBSP. Metadata to be applied
* to a complete video sequence, like width and height.
*/
this.getSps0Rbsp = function() { // :ByteArray
// remove emulation bytes. Is this nesessary? is there ever emulation
// bytes in the SPS?
var
spsCount = 0,
sps0 = this.sps[0], // :ByteArray
rbspCount = 0,
start = 1, // :uint
end = sps0.byteLength - 2, // :uint
rbsp = new Uint8Array(sps0.byteLength), // :ByteArray
offset = 0; // :uint
// H264 requires emulation bytes (0x03) be dropped to interpret NAL
// units. For instance, 0x8a03b4 should be read as 0x8ab4.
for (offset = start ; offset < end ;) {
if (3 !== sps0[offset + 2]) {
offset += 3;
} else if (0 !== sps0[offset + 1]) {
offset += 2;
} else if (0 !== sps0[offset + 0]) {
offset += 1;
} else {
rbsp.set([0x00, 0x00], rbspCount);
spsCount += 2;
rbspCount += 2;
if (offset > start) {
// If there are bytes to write, write them
rbsp.set(sps0.subarray(start, offset - start), rbspCount);
spsCount += offset - start;
rbspCount += offset - start;
}
// skip the emulation bytes
offset += 3;
start = offset;
sps = this.sps[0],
offset = 1,
start = 1,
written = 0,
end = sps.byteLength - 2,
result = new Uint8Array(sps.byteLength);
// In order to prevent 0x0000 01 from being interpreted as a
// NAL start code, occurences of that byte sequence in the
// RBSP are escaped with an "emulation byte". That turns
// sequences of 0x0000 01 into 0x0000 0301. When interpreting
// a NAL payload, they must be filtered back out.
while (offset < end) {
if (sps[offset] === 0x00 &&
sps[offset + 1] === 0x00 &&
sps[offset + 2] === 0x03) {
result.set(sps.subarray(start, offset + 1), written);
written += offset + 1 - start;
start = offset + 3;
}
offset++;
}
// copy any remaining bytes
rbsp.set(sps0.subarray(spsCount), rbspCount); // sps0.readBytes(rbsp, rbsp.length);
return rbsp;
result.set(sps.subarray(start), written);
return result.subarray(0, written + (sps.byteLength - start));
};
// (pts:uint):FlvTag
......@@ -257,21 +218,36 @@
};
},
// incomplete, see Table 7.1 of ITU-T H.264 for 12-32
NALUnitType = {
unspecified: 0,
slice_layer_without_partitioning_rbsp_non_idr: 1,
slice_data_partition_a_layer_rbsp: 2,
slice_data_partition_b_layer_rbsp: 3,
slice_data_partition_c_layer_rbsp: 4,
slice_layer_without_partitioning_rbsp_idr: 5,
sei_rbsp: 6,
seq_parameter_set_rbsp: 7,
pic_parameter_set_rbsp: 8,
access_unit_delimiter_rbsp: 9,
end_of_seq_rbsp: 10,
end_of_stream_rbsp: 11
};
NALUnitType;
/**
* Network Abstraction Layer (NAL) units are the packets of an H264
* stream. NAL units are divided into types based on their payload
* data. Each type has a unique numeric identifier.
*
* NAL unit
* |- NAL header -|------ RBSP ------|
*
* NAL unit: Network abstraction layer unit. The combination of a NAL
* header and an RBSP.
* NAL header: the encapsulation unit for transport-specific metadata in
* an h264 stream. Exactly one byte.
*/
// incomplete, see Table 7.1 of ITU-T H.264 for 12-32
window.videojs.hls.NALUnitType = NALUnitType = {
unspecified: 0,
slice_layer_without_partitioning_rbsp_non_idr: 1,
slice_data_partition_a_layer_rbsp: 2,
slice_data_partition_b_layer_rbsp: 3,
slice_data_partition_c_layer_rbsp: 4,
slice_layer_without_partitioning_rbsp_idr: 5,
sei_rbsp: 6,
seq_parameter_set_rbsp: 7,
pic_parameter_set_rbsp: 8,
access_unit_delimiter_rbsp: 9,
end_of_seq_rbsp: 10,
end_of_stream_rbsp: 11
};
window.videojs.hls.H264Stream = function() {
var
......@@ -395,7 +371,7 @@
data[offset + 1] === 0 &&
data[offset + 2] === 1) {
// 00 : 00 00 01
h264Frame.length -= 1;
// h264Frame.length -= 1;
state = 3;
return this.writeBytes(data, offset + 3, length - 3);
}
......@@ -466,7 +442,6 @@
h264Frame.endNalUnit(newExtraData.pps);
break;
case NALUnitType.slice_layer_without_partitioning_rbsp_idr:
h264Frame.keyFrame = true;
h264Frame.endNalUnit();
break;
default:
......@@ -477,8 +452,13 @@
// setup to begin processing the new NAL unit
nalUnitType = data[offset] & 0x1F;
if (h264Frame && 9 === nalUnitType) {
this.finishFrame(); // We are starting a new access unit. Flush the previous one
if (h264Frame) {
if (nalUnitType === NALUnitType.access_unit_delimiter_rbsp) {
// starting a new access unit, flush the previous one
this.finishFrame();
} else if (nalUnitType === NALUnitType.slice_layer_without_partitioning_rbsp_idr) {
h264Frame.keyFrame = true;
}
}
// finishFrame may render h264Frame null, so we must test again
......
module('H264 Stream');
var
nalUnitTypes = window.videojs.hls.NALUnitType,
FlvTag = window.videojs.hls.FlvTag;
test('metadata is generated for IDRs after a full NAL unit is written', function() {
var
h264Stream = new videojs.hls.H264Stream(),
accessUnitDelimiter = new Uint8Array([
0x00,
0x00,
0x01,
nalUnitTypes.access_unit_delimiter_rbsp
]),
seqParamSet = new Uint8Array([
0x00,
0x00,
0x01,
0x60 | nalUnitTypes.seq_parameter_set_rbsp,
0x00, // profile_idc
0x00, // constraint_set flags
0x00, // level_idc
// seq_parameter_set_id ue(v) 0 => 1
// log2_max_frame_num_minus4 ue(v) 1 => 010
// pic_order_cnt_type ue(v) 0 => 1
// log2_max_pic_order_cnt_lsb_minus4 ue(v) 1 => 010
// max_num_ref_frames ue(v) 1 => 010
// gaps_in_frame_num_value_allowed u(1) 0
// pic_width_in_mbs_minus1 ue(v) 0 => 1
// pic_height_in_map_units_minus1 ue(v) 0 => 1
// frame_mbs_only_flag u(1) 1
// direct_8x8_inference_flag u(1) 0
// frame_cropping_flag u(1) 0
// vui_parameters_present_flag u(1) 0
// 1010 1010 0100 1110 00(00 0000)
0xAA,
0x4E,
0x00
]),
idr = new Uint8Array([
0x00,
0x00,
0x01,
nalUnitTypes.slice_layer_without_partitioning_rbsp_idr
]);;
h264Stream.setNextTimeStamp(0, 0, true);
h264Stream.writeBytes(accessUnitDelimiter, 0, accessUnitDelimiter.byteLength);
h264Stream.writeBytes(seqParamSet, 0, seqParamSet.byteLength);
h264Stream.writeBytes(idr, 0, idr.byteLength);
h264Stream.setNextTimeStamp(1, 1, true);
strictEqual(h264Stream.tags.length, 3, 'three tags are written');
ok(FlvTag.isMetaData(h264Stream.tags[0].bytes),
'metadata is written');
ok(FlvTag.isVideoFrame(h264Stream.tags[1].bytes),
'picture parameter set is written');
ok(h264Stream.tags[2].keyFrame, 'key frame is written');
});
......@@ -44,6 +44,7 @@
</script>
<script src="videojs-hls_test.js"></script>
<script src="segment-parser.js"></script>
<script src="h264-stream_test.js"></script>
<script src="exp-golomb_test.js"></script>
<script src="flv-tag_test.js"></script>
<script src="m3u8_test.js"></script>
......