Simply emulation byte filtering
The original algorithm was incorrectly including the nal_unit_type in the returned RBSP when there were no emulation bytes to filter. This version is slower because it examines every byte individually but it's simpler to understand. Add a test case for H264 stream parsing that demonstrates the issue.
Showing
3 changed files
with
122 additions
and
81 deletions
... | @@ -8,7 +8,6 @@ | ... | @@ -8,7 +8,6 @@ |
8 | 8 | ||
9 | (function(window) { | 9 | (function(window) { |
10 | var | 10 | var |
11 | |||
12 | ExpGolomb = window.videojs.hls.ExpGolomb, | 11 | ExpGolomb = window.videojs.hls.ExpGolomb, |
13 | FlvTag = window.videojs.hls.FlvTag, | 12 | FlvTag = window.videojs.hls.FlvTag, |
14 | 13 | ||
... | @@ -16,22 +15,8 @@ | ... | @@ -16,22 +15,8 @@ |
16 | this.sps = []; // :Array | 15 | this.sps = []; // :Array |
17 | this.pps = []; // :Array | 16 | this.pps = []; // :Array |
18 | 17 | ||
19 | this.addSPS = function(size) { // :ByteArray | ||
20 | console.assert(size > 0); | ||
21 | var tmp = new Uint8Array(size); // :ByteArray | ||
22 | this.sps.push(tmp); | ||
23 | return tmp; | ||
24 | }; | ||
25 | |||
26 | this.addPPS = function(size) { // :ByteArray | ||
27 | console.assert(size); | ||
28 | var tmp = new Uint8Array(size); // :ByteArray | ||
29 | this.pps.push(tmp); | ||
30 | return tmp; | ||
31 | }; | ||
32 | |||
33 | this.extraDataExists = function() { // :Boolean | 18 | this.extraDataExists = function() { // :Boolean |
34 | return 0 < this.sps.length; | 19 | return this.sps.length > 0; |
35 | }; | 20 | }; |
36 | 21 | ||
37 | // (sizeOfScalingList:int, expGolomb:ExpGolomb):void | 22 | // (sizeOfScalingList:int, expGolomb:ExpGolomb):void |
... | @@ -56,61 +41,37 @@ | ... | @@ -56,61 +41,37 @@ |
56 | }; | 41 | }; |
57 | 42 | ||
58 | /** | 43 | /** |
59 | * NAL unit | ||
60 | * |- NAL header -|------ RBSP ------| | ||
61 | * | ||
62 | * NAL unit: Network abstraction layer unit. The combination of a NAL | ||
63 | * header and an RBSP. | ||
64 | * NAL header: the encapsulation unit for transport-specific metadata in | ||
65 | * an h264 stream. Exactly one byte. | ||
66 | * RBSP: raw bit-stream payload. The actual encoded video data. | 44 | * RBSP: raw bit-stream payload. The actual encoded video data. |
67 | * | 45 | * |
68 | * SPS: sequence parameter set. Part of the RBSP. Metadata to be applied | 46 | * SPS: sequence parameter set. Part of the RBSP. Metadata to be applied |
69 | * to a complete video sequence, like width and height. | 47 | * to a complete video sequence, like width and height. |
70 | */ | 48 | */ |
71 | this.getSps0Rbsp = function() { // :ByteArray | 49 | this.getSps0Rbsp = function() { // :ByteArray |
72 | // remove emulation bytes. Is this nesessary? is there ever emulation | ||
73 | // bytes in the SPS? | ||
74 | var | 50 | var |
75 | spsCount = 0, | 51 | sps = this.sps[0], |
76 | sps0 = this.sps[0], // :ByteArray | 52 | offset = 1, |
77 | rbspCount = 0, | 53 | start = 1, |
78 | start = 1, // :uint | 54 | written = 0, |
79 | end = sps0.byteLength - 2, // :uint | 55 | end = sps.byteLength - 2, |
80 | rbsp = new Uint8Array(sps0.byteLength), // :ByteArray | 56 | result = new Uint8Array(sps.byteLength); |
81 | offset = 0; // :uint | 57 | |
82 | 58 | // In order to prevent 0x0000 01 from being interpreted as a | |
83 | // H264 requires emulation bytes (0x03) be dropped to interpret NAL | 59 | // NAL start code, occurences of that byte sequence in the |
84 | // units. For instance, 0x8a03b4 should be read as 0x8ab4. | 60 | // RBSP are escaped with an "emulation byte". That turns |
85 | for (offset = start ; offset < end ;) { | 61 | // sequences of 0x0000 01 into 0x0000 0301. When interpreting |
86 | if (3 !== sps0[offset + 2]) { | 62 | // a NAL payload, they must be filtered back out. |
87 | offset += 3; | 63 | while (offset < end) { |
88 | } else if (0 !== sps0[offset + 1]) { | 64 | if (sps[offset] === 0x00 && |
89 | offset += 2; | 65 | sps[offset + 1] === 0x00 && |
90 | } else if (0 !== sps0[offset + 0]) { | 66 | sps[offset + 2] === 0x03) { |
91 | offset += 1; | 67 | result.set(sps.subarray(start, offset + 1), written); |
92 | } else { | 68 | written += offset + 1 - start; |
93 | rbsp.set([0x00, 0x00], rbspCount); | 69 | start = offset + 3; |
94 | spsCount += 2; | ||
95 | rbspCount += 2; | ||
96 | |||
97 | if (offset > start) { | ||
98 | // If there are bytes to write, write them | ||
99 | rbsp.set(sps0.subarray(start, offset - start), rbspCount); | ||
100 | spsCount += offset - start; | ||
101 | rbspCount += offset - start; | ||
102 | } | ||
103 | |||
104 | // skip the emulation bytes | ||
105 | offset += 3; | ||
106 | start = offset; | ||
107 | } | 70 | } |
71 | offset++; | ||
108 | } | 72 | } |
109 | 73 | result.set(sps.subarray(start), written); | |
110 | // copy any remaining bytes | 74 | return result.subarray(0, written + (sps.byteLength - start)); |
111 | rbsp.set(sps0.subarray(spsCount), rbspCount); // sps0.readBytes(rbsp, rbsp.length); | ||
112 | |||
113 | return rbsp; | ||
114 | }; | 75 | }; |
115 | 76 | ||
116 | // (pts:uint):FlvTag | 77 | // (pts:uint):FlvTag |
... | @@ -257,21 +218,36 @@ | ... | @@ -257,21 +218,36 @@ |
257 | }; | 218 | }; |
258 | }, | 219 | }, |
259 | 220 | ||
260 | // incomplete, see Table 7.1 of ITU-T H.264 for 12-32 | 221 | NALUnitType; |
261 | NALUnitType = { | 222 | |
262 | unspecified: 0, | 223 | /** |
263 | slice_layer_without_partitioning_rbsp_non_idr: 1, | 224 | * Network Abstraction Layer (NAL) units are the packets of an H264 |
264 | slice_data_partition_a_layer_rbsp: 2, | 225 | * stream. NAL units are divided into types based on their payload |
265 | slice_data_partition_b_layer_rbsp: 3, | 226 | * data. Each type has a unique numeric identifier. |
266 | slice_data_partition_c_layer_rbsp: 4, | 227 | * |
267 | slice_layer_without_partitioning_rbsp_idr: 5, | 228 | * NAL unit |
268 | sei_rbsp: 6, | 229 | * |- NAL header -|------ RBSP ------| |
269 | seq_parameter_set_rbsp: 7, | 230 | * |
270 | pic_parameter_set_rbsp: 8, | 231 | * NAL unit: Network abstraction layer unit. The combination of a NAL |
271 | access_unit_delimiter_rbsp: 9, | 232 | * header and an RBSP. |
272 | end_of_seq_rbsp: 10, | 233 | * NAL header: the encapsulation unit for transport-specific metadata in |
273 | end_of_stream_rbsp: 11 | 234 | * an h264 stream. Exactly one byte. |
274 | }; | 235 | */ |
236 | // incomplete, see Table 7.1 of ITU-T H.264 for 12-32 | ||
237 | window.videojs.hls.NALUnitType = NALUnitType = { | ||
238 | unspecified: 0, | ||
239 | slice_layer_without_partitioning_rbsp_non_idr: 1, | ||
240 | slice_data_partition_a_layer_rbsp: 2, | ||
241 | slice_data_partition_b_layer_rbsp: 3, | ||
242 | slice_data_partition_c_layer_rbsp: 4, | ||
243 | slice_layer_without_partitioning_rbsp_idr: 5, | ||
244 | sei_rbsp: 6, | ||
245 | seq_parameter_set_rbsp: 7, | ||
246 | pic_parameter_set_rbsp: 8, | ||
247 | access_unit_delimiter_rbsp: 9, | ||
248 | end_of_seq_rbsp: 10, | ||
249 | end_of_stream_rbsp: 11 | ||
250 | }; | ||
275 | 251 | ||
276 | window.videojs.hls.H264Stream = function() { | 252 | window.videojs.hls.H264Stream = function() { |
277 | var | 253 | var |
... | @@ -395,7 +371,7 @@ | ... | @@ -395,7 +371,7 @@ |
395 | data[offset + 1] === 0 && | 371 | data[offset + 1] === 0 && |
396 | data[offset + 2] === 1) { | 372 | data[offset + 2] === 1) { |
397 | // 00 : 00 00 01 | 373 | // 00 : 00 00 01 |
398 | h264Frame.length -= 1; | 374 | // h264Frame.length -= 1; |
399 | state = 3; | 375 | state = 3; |
400 | return this.writeBytes(data, offset + 3, length - 3); | 376 | return this.writeBytes(data, offset + 3, length - 3); |
401 | } | 377 | } |
... | @@ -466,7 +442,6 @@ | ... | @@ -466,7 +442,6 @@ |
466 | h264Frame.endNalUnit(newExtraData.pps); | 442 | h264Frame.endNalUnit(newExtraData.pps); |
467 | break; | 443 | break; |
468 | case NALUnitType.slice_layer_without_partitioning_rbsp_idr: | 444 | case NALUnitType.slice_layer_without_partitioning_rbsp_idr: |
469 | h264Frame.keyFrame = true; | ||
470 | h264Frame.endNalUnit(); | 445 | h264Frame.endNalUnit(); |
471 | break; | 446 | break; |
472 | default: | 447 | default: |
... | @@ -477,8 +452,13 @@ | ... | @@ -477,8 +452,13 @@ |
477 | 452 | ||
478 | // setup to begin processing the new NAL unit | 453 | // setup to begin processing the new NAL unit |
479 | nalUnitType = data[offset] & 0x1F; | 454 | nalUnitType = data[offset] & 0x1F; |
480 | if (h264Frame && 9 === nalUnitType) { | 455 | if (h264Frame) { |
481 | this.finishFrame(); // We are starting a new access unit. Flush the previous one | 456 | if (nalUnitType === NALUnitType.access_unit_delimiter_rbsp) { |
457 | // starting a new access unit, flush the previous one | ||
458 | this.finishFrame(); | ||
459 | } else if (nalUnitType === NALUnitType.slice_layer_without_partitioning_rbsp_idr) { | ||
460 | h264Frame.keyFrame = true; | ||
461 | } | ||
482 | } | 462 | } |
483 | 463 | ||
484 | // finishFrame may render h264Frame null, so we must test again | 464 | // finishFrame may render h264Frame null, so we must test again | ... | ... |
test/h264-stream_test.js
0 → 100644
1 | module('H264 Stream'); | ||
2 | |||
3 | var | ||
4 | nalUnitTypes = window.videojs.hls.NALUnitType, | ||
5 | FlvTag = window.videojs.hls.FlvTag; | ||
6 | |||
7 | test('metadata is generated for IDRs after a full NAL unit is written', function() { | ||
8 | var | ||
9 | h264Stream = new videojs.hls.H264Stream(), | ||
10 | accessUnitDelimiter = new Uint8Array([ | ||
11 | 0x00, | ||
12 | 0x00, | ||
13 | 0x01, | ||
14 | nalUnitTypes.access_unit_delimiter_rbsp | ||
15 | ]), | ||
16 | seqParamSet = new Uint8Array([ | ||
17 | 0x00, | ||
18 | 0x00, | ||
19 | 0x01, | ||
20 | 0x60 | nalUnitTypes.seq_parameter_set_rbsp, | ||
21 | 0x00, // profile_idc | ||
22 | 0x00, // constraint_set flags | ||
23 | 0x00, // level_idc | ||
24 | // seq_parameter_set_id ue(v) 0 => 1 | ||
25 | // log2_max_frame_num_minus4 ue(v) 1 => 010 | ||
26 | // pic_order_cnt_type ue(v) 0 => 1 | ||
27 | // log2_max_pic_order_cnt_lsb_minus4 ue(v) 1 => 010 | ||
28 | // max_num_ref_frames ue(v) 1 => 010 | ||
29 | // gaps_in_frame_num_value_allowed u(1) 0 | ||
30 | // pic_width_in_mbs_minus1 ue(v) 0 => 1 | ||
31 | // pic_height_in_map_units_minus1 ue(v) 0 => 1 | ||
32 | // frame_mbs_only_flag u(1) 1 | ||
33 | // direct_8x8_inference_flag u(1) 0 | ||
34 | // frame_cropping_flag u(1) 0 | ||
35 | // vui_parameters_present_flag u(1) 0 | ||
36 | // 1010 1010 0100 1110 00(00 0000) | ||
37 | 0xAA, | ||
38 | 0x4E, | ||
39 | 0x00 | ||
40 | ]), | ||
41 | idr = new Uint8Array([ | ||
42 | 0x00, | ||
43 | 0x00, | ||
44 | 0x01, | ||
45 | nalUnitTypes.slice_layer_without_partitioning_rbsp_idr | ||
46 | ]);; | ||
47 | |||
48 | h264Stream.setNextTimeStamp(0, 0, true); | ||
49 | h264Stream.writeBytes(accessUnitDelimiter, 0, accessUnitDelimiter.byteLength); | ||
50 | h264Stream.writeBytes(seqParamSet, 0, seqParamSet.byteLength); | ||
51 | h264Stream.writeBytes(idr, 0, idr.byteLength); | ||
52 | h264Stream.setNextTimeStamp(1, 1, true); | ||
53 | |||
54 | strictEqual(h264Stream.tags.length, 3, 'three tags are written'); | ||
55 | ok(FlvTag.isMetaData(h264Stream.tags[0].bytes), | ||
56 | 'metadata is written'); | ||
57 | ok(FlvTag.isVideoFrame(h264Stream.tags[1].bytes), | ||
58 | 'picture parameter set is written'); | ||
59 | ok(h264Stream.tags[2].keyFrame, 'key frame is written'); | ||
60 | }); |
... | @@ -44,6 +44,7 @@ | ... | @@ -44,6 +44,7 @@ |
44 | </script> | 44 | </script> |
45 | <script src="videojs-hls_test.js"></script> | 45 | <script src="videojs-hls_test.js"></script> |
46 | <script src="segment-parser.js"></script> | 46 | <script src="segment-parser.js"></script> |
47 | <script src="h264-stream_test.js"></script> | ||
47 | <script src="exp-golomb_test.js"></script> | 48 | <script src="exp-golomb_test.js"></script> |
48 | <script src="flv-tag_test.js"></script> | 49 | <script src="flv-tag_test.js"></script> |
49 | <script src="m3u8_test.js"></script> | 50 | <script src="m3u8_test.js"></script> | ... | ... |
-
Please register or sign in to post a comment