Generate a valid audio initialization segment
Modify the mp4 generator to inspect audio tracks and generate a working initialization segment. Hook the audio init segment up to the mp4 transmuxing test page.
Showing
9 changed files
with
585 additions
and
131 deletions
... | @@ -4,7 +4,7 @@ | ... | @@ -4,7 +4,7 @@ |
4 | var box, dinf, ftyp, mdat, mfhd, minf, moof, moov, mvex, mvhd, trak, | 4 | var box, dinf, ftyp, mdat, mfhd, minf, moof, moov, mvex, mvhd, trak, |
5 | tkhd, mdia, mdhd, hdlr, sdtp, stbl, stsd, styp, traf, trex, trun, | 5 | tkhd, mdia, mdhd, hdlr, sdtp, stbl, stsd, styp, traf, trex, trun, |
6 | types, MAJOR_BRAND, MINOR_VERSION, AVC1_BRAND, VIDEO_HDLR, | 6 | types, MAJOR_BRAND, MINOR_VERSION, AVC1_BRAND, VIDEO_HDLR, |
7 | AUDIO_HDLR, HDLR_TYPES, VMHD, DREF, STCO, STSC, STSZ, STTS, | 7 | AUDIO_HDLR, HDLR_TYPES, ESDS, VMHD, SMHD, DREF, STCO, STSC, STSZ, STTS, |
8 | Uint8Array, DataView; | 8 | Uint8Array, DataView; |
9 | 9 | ||
10 | Uint8Array = window.Uint8Array; | 10 | Uint8Array = window.Uint8Array; |
... | @@ -19,6 +19,7 @@ DataView = window.DataView; | ... | @@ -19,6 +19,7 @@ DataView = window.DataView; |
19 | btrt: [], | 19 | btrt: [], |
20 | dinf: [], | 20 | dinf: [], |
21 | dref: [], | 21 | dref: [], |
22 | esds: [], | ||
22 | ftyp: [], | 23 | ftyp: [], |
23 | hdlr: [], | 24 | hdlr: [], |
24 | mdat: [], | 25 | mdat: [], |
... | @@ -28,9 +29,11 @@ DataView = window.DataView; | ... | @@ -28,9 +29,11 @@ DataView = window.DataView; |
28 | minf: [], | 29 | minf: [], |
29 | moof: [], | 30 | moof: [], |
30 | moov: [], | 31 | moov: [], |
32 | mp4a: [], // codingname | ||
31 | mvex: [], | 33 | mvex: [], |
32 | mvhd: [], | 34 | mvhd: [], |
33 | sdtp: [], | 35 | sdtp: [], |
36 | smhd: [], | ||
34 | stbl: [], | 37 | stbl: [], |
35 | stco: [], | 38 | stco: [], |
36 | stsc: [], | 39 | stsc: [], |
... | @@ -109,6 +112,39 @@ DataView = window.DataView; | ... | @@ -109,6 +112,39 @@ DataView = window.DataView; |
109 | 0x00, // version 0 | 112 | 0x00, // version 0 |
110 | 0x00, 0x00, 0x01 // entry_flags | 113 | 0x00, 0x00, 0x01 // entry_flags |
111 | ]); | 114 | ]); |
115 | ESDS = new Uint8Array([ | ||
116 | 0x00, // version | ||
117 | 0x00, 0x00, 0x00, // flags | ||
118 | |||
119 | // ES_Descriptor | ||
120 | 0x03, // tag, ES_DescrTag | ||
121 | 0x19, // length | ||
122 | 0x00, 0x00, // ES_ID | ||
123 | 0x00, // streamDependenceFlag, URL_flag, reserved, streamPriority | ||
124 | |||
125 | // DecoderConfigDescriptor | ||
126 | 0x04, // tag, DecoderConfigDescrTag | ||
127 | 0x11, // length | ||
128 | 0x40, // object type | ||
129 | 0x15, // streamType | ||
130 | 0x00, 0x06, 0x00, // bufferSizeDB | ||
131 | 0x00, 0x00, 0xda, 0xc0, // maxBitrate | ||
132 | 0x00, 0x00, 0xda, 0xc0, // avgBitrate | ||
133 | |||
134 | // DecoderSpecificInfo | ||
135 | 0x05, // tag, DecoderSpecificInfoTag | ||
136 | 0x02, // length | ||
137 | // ISO/IEC 14496-3, AudioSpecificConfig | ||
138 | 0x11, // AudioObjectType, AAC LC. | ||
139 | 0x90, // samplingFrequencyIndex, 8 -> 16000. channelConfig, 2 -> stereo. | ||
140 | 0x06, 0x01, 0x02 // GASpecificConfig | ||
141 | ]); | ||
142 | SMHD = new Uint8Array([ | ||
143 | 0x00, // version | ||
144 | 0x00, 0x00, 0x00, // flags | ||
145 | 0x00, 0x00, // balance, 0 means centered | ||
146 | 0x00, 0x00 // reserved | ||
147 | ]); | ||
112 | STCO = new Uint8Array([ | 148 | STCO = new Uint8Array([ |
113 | 0x00, // version | 149 | 0x00, // version |
114 | 0x00, 0x00, 0x00, // flags | 150 | 0x00, 0x00, 0x00, // flags |
... | @@ -171,24 +207,35 @@ hdlr = function(type) { | ... | @@ -171,24 +207,35 @@ hdlr = function(type) { |
171 | mdat = function(data) { | 207 | mdat = function(data) { |
172 | return box(types.mdat, data); | 208 | return box(types.mdat, data); |
173 | }; | 209 | }; |
174 | mdhd = function(duration) { | 210 | mdhd = function(track) { |
175 | return box(types.mdhd, new Uint8Array([ | 211 | var result = new Uint8Array([ |
176 | 0x00, // version 0 | 212 | 0x00, // version 0 |
177 | 0x00, 0x00, 0x00, // flags | 213 | 0x00, 0x00, 0x00, // flags |
178 | 0x00, 0x00, 0x00, 0x02, // creation_time | 214 | 0x00, 0x00, 0x00, 0x02, // creation_time |
179 | 0x00, 0x00, 0x00, 0x03, // modification_time | 215 | 0x00, 0x00, 0x00, 0x03, // modification_time |
180 | 0x00, 0x01, 0x5f, 0x90, // timescale, 90,000 "ticks" per second | 216 | 0x00, 0x01, 0x5f, 0x90, // timescale, 90,000 "ticks" per second |
181 | 217 | ||
182 | (duration & 0xFF000000) >> 24, | 218 | (track.duration >>> 24), |
183 | (duration & 0xFF0000) >> 16, | 219 | (track.duration >>> 16) & 0xFF, |
184 | (duration & 0xFF00) >> 8, | 220 | (track.duration >>> 8) & 0xFF, |
185 | duration & 0xFF, // duration | 221 | track.duration & 0xFF, // duration |
186 | 0x55, 0xc4, // 'und' language (undetermined) | 222 | 0x55, 0xc4, // 'und' language (undetermined) |
187 | 0x00, 0x00 | 223 | 0x00, 0x00 |
188 | ])); | 224 | ]); |
225 | |||
226 | // Use the sample rate from the track metadata, when it is | ||
227 | // defined. The sample rate can be parsed out of an ADTS header, for | ||
228 | // instance. | ||
229 | if (track.samplerate) { | ||
230 | result[12] = (track.samplerate >>> 24); | ||
231 | result[13] = (track.samplerate >>> 16) & 0xFF; | ||
232 | result[14] = (track.samplerate >>> 8) & 0xFF; | ||
233 | result[15] = (track.samplerate) & 0xFF; | ||
234 | } | ||
235 | return box(types.mdhd, result); | ||
189 | }; | 236 | }; |
190 | mdia = function(track) { | 237 | mdia = function(track) { |
191 | return box(types.mdia, mdhd(track.duration), hdlr(track.type), minf(track)); | 238 | return box(types.mdia, mdhd(track), hdlr(track.type), minf(track)); |
192 | }; | 239 | }; |
193 | mfhd = function(sequenceNumber) { | 240 | mfhd = function(sequenceNumber) { |
194 | return box(types.mfhd, new Uint8Array([ | 241 | return box(types.mfhd, new Uint8Array([ |
... | @@ -201,7 +248,10 @@ mfhd = function(sequenceNumber) { | ... | @@ -201,7 +248,10 @@ mfhd = function(sequenceNumber) { |
201 | ])); | 248 | ])); |
202 | }; | 249 | }; |
203 | minf = function(track) { | 250 | minf = function(track) { |
204 | return box(types.minf, box(types.vmhd, VMHD), dinf(), stbl(track)); | 251 | return box(types.minf, |
252 | track.type === 'video' ? box(types.vmhd, VMHD) : box(types.smhd, SMHD), | ||
253 | dinf(), | ||
254 | stbl(track)); | ||
205 | }; | 255 | }; |
206 | moof = function(sequenceNumber, tracks) { | 256 | moof = function(sequenceNumber, tracks) { |
207 | var | 257 | var |
... | @@ -217,7 +267,9 @@ moof = function(sequenceNumber, tracks) { | ... | @@ -217,7 +267,9 @@ moof = function(sequenceNumber, tracks) { |
217 | ].concat(trackFragments)); | 267 | ].concat(trackFragments)); |
218 | }; | 268 | }; |
219 | /** | 269 | /** |
220 | * @param tracks... (optional) {array} the tracks associated with this movie | 270 | * Returns a movie box. |
271 | * @param tracks {array} the tracks associated with this movie | ||
272 | * @see ISO/IEC 14496-12:2012(E), section 8.2.1 | ||
221 | */ | 273 | */ |
222 | moov = function(tracks) { | 274 | moov = function(tracks) { |
223 | var | 275 | var |
... | @@ -307,32 +359,36 @@ stbl = function(track) { | ... | @@ -307,32 +359,36 @@ stbl = function(track) { |
307 | box(types.stco, STCO)); | 359 | box(types.stco, STCO)); |
308 | }; | 360 | }; |
309 | 361 | ||
310 | stsd = function(track) { | 362 | (function() { |
311 | var sequenceParameterSets = [], pictureParameterSets = [], i; | 363 | var videoSample, audioSample; |
312 | 364 | ||
313 | if (track.type === 'audio') { | 365 | stsd = function(track) { |
314 | return box(types.stsd); | ||
315 | } | ||
316 | 366 | ||
317 | // assemble the SPSs | 367 | return box(types.stsd, new Uint8Array([ |
318 | for (i = 0; i < track.sps.length; i++) { | 368 | 0x00, // version 0 |
319 | sequenceParameterSets.push((track.sps[i].byteLength & 0xFF00) >>> 8); | 369 | 0x00, 0x00, 0x00, // flags |
320 | sequenceParameterSets.push((track.sps[i].byteLength & 0xFF)); // sequenceParameterSetLength | 370 | 0x00, 0x00, 0x00, 0x01 |
321 | sequenceParameterSets = sequenceParameterSets.concat(Array.prototype.slice.call(track.sps[i])); // SPS | 371 | ]), track.type === 'video' ? videoSample(track) : audioSample(track)); |
322 | } | 372 | }; |
323 | 373 | ||
324 | // assemble the PPSs | 374 | videoSample = function(track) { |
325 | for (i = 0; i < track.pps.length; i++) { | 375 | var sequenceParameterSets = [], pictureParameterSets = [], i; |
326 | pictureParameterSets.push((track.pps[i].byteLength & 0xFF00) >>> 8); | ||
327 | pictureParameterSets.push((track.pps[i].byteLength & 0xFF)); | ||
328 | pictureParameterSets = pictureParameterSets.concat(Array.prototype.slice.call(track.pps[i])); | ||
329 | } | ||
330 | 376 | ||
331 | return box(types.stsd, new Uint8Array([ | 377 | // assemble the SPSs |
332 | 0x00, // version 0 | 378 | for (i = 0; i < track.sps.length; i++) { |
333 | 0x00, 0x00, 0x00, // flags | 379 | sequenceParameterSets.push((track.sps[i].byteLength & 0xFF00) >>> 8); |
334 | 0x00, 0x00, 0x00, 0x01]), | 380 | sequenceParameterSets.push((track.sps[i].byteLength & 0xFF)); // sequenceParameterSetLength |
335 | box(types.avc1, new Uint8Array([ | 381 | sequenceParameterSets = sequenceParameterSets.concat(Array.prototype.slice.call(track.sps[i])); // SPS |
382 | } | ||
383 | |||
384 | // assemble the PPSs | ||
385 | for (i = 0; i < track.pps.length; i++) { | ||
386 | pictureParameterSets.push((track.pps[i].byteLength & 0xFF00) >>> 8); | ||
387 | pictureParameterSets.push((track.pps[i].byteLength & 0xFF)); | ||
388 | pictureParameterSets = pictureParameterSets.concat(Array.prototype.slice.call(track.pps[i])); | ||
389 | } | ||
390 | |||
391 | return box(types.avc1, new Uint8Array([ | ||
336 | 0x00, 0x00, 0x00, | 392 | 0x00, 0x00, 0x00, |
337 | 0x00, 0x00, 0x00, // reserved | 393 | 0x00, 0x00, 0x00, // reserved |
338 | 0x00, 0x01, // data_reference_index | 394 | 0x00, 0x01, // data_reference_index |
... | @@ -359,31 +415,60 @@ stsd = function(track) { | ... | @@ -359,31 +415,60 @@ stsd = function(track) { |
359 | 0x00, 0x00, 0x00, 0x00, | 415 | 0x00, 0x00, 0x00, 0x00, |
360 | 0x00, 0x00, 0x00, // compressorname | 416 | 0x00, 0x00, 0x00, // compressorname |
361 | 0x00, 0x18, // depth = 24 | 417 | 0x00, 0x18, // depth = 24 |
362 | 0x11, 0x11]), // pre_defined = -1 | 418 | 0x11, 0x11 // pre_defined = -1 |
363 | box(types.avcC, new Uint8Array([ | 419 | ]), box(types.avcC, new Uint8Array([ |
364 | 0x01, // configurationVersion | 420 | 0x01, // configurationVersion |
365 | track.profileIdc, // AVCProfileIndication | 421 | track.profileIdc, // AVCProfileIndication |
366 | track.profileCompatibility, // profile_compatibility | 422 | track.profileCompatibility, // profile_compatibility |
367 | track.levelIdc, // AVCLevelIndication | 423 | track.levelIdc, // AVCLevelIndication |
368 | 0xff // lengthSizeMinusOne, hard-coded to 4 bytes | 424 | 0xff // lengthSizeMinusOne, hard-coded to 4 bytes |
369 | ].concat([ | 425 | ].concat([ |
370 | track.sps.length // numOfSequenceParameterSets | 426 | track.sps.length // numOfSequenceParameterSets |
371 | ]).concat(sequenceParameterSets).concat([ | 427 | ]).concat(sequenceParameterSets).concat([ |
372 | track.pps.length // numOfPictureParameterSets | 428 | track.pps.length // numOfPictureParameterSets |
373 | ]).concat(pictureParameterSets))), // "PPS" | 429 | ]).concat(pictureParameterSets))), // "PPS" |
374 | box(types.btrt, new Uint8Array([ | 430 | box(types.btrt, new Uint8Array([ |
375 | 0x00, 0x1c, 0x9c, 0x80, // bufferSizeDB | 431 | 0x00, 0x1c, 0x9c, 0x80, // bufferSizeDB |
376 | 0x00, 0x2d, 0xc6, 0xc0, // maxBitrate | 432 | 0x00, 0x2d, 0xc6, 0xc0, // maxBitrate |
377 | 0x00, 0x2d, 0xc6, 0xc0])) // avgBitrate | 433 | 0x00, 0x2d, 0xc6, 0xc0 |
378 | )); | 434 | ])) // avgBitrate |
379 | }; | 435 | ); |
436 | }; | ||
437 | |||
438 | audioSample = function(track) { | ||
439 | return box(types.mp4a, new Uint8Array([ | ||
440 | |||
441 | // SampleEntry, ISO/IEC 14496-12 | ||
442 | 0x00, 0x00, 0x00, | ||
443 | 0x00, 0x00, 0x00, // reserved | ||
444 | 0x00, 0x01, // data_reference_index | ||
445 | |||
446 | // AudioSampleEntry, ISO/IEC 14496-12 | ||
447 | 0x00, 0x00, 0x00, 0x00, // reserved | ||
448 | 0x00, 0x00, 0x00, 0x00, // reserved | ||
449 | (track.channelcount & 0xff00) >> 8, | ||
450 | (track.channelcount & 0xff), // channelcount | ||
451 | |||
452 | (track.samplesize & 0xff00) >> 8, | ||
453 | (track.samplesize & 0xff), // samplesize | ||
454 | 0x00, 0x00, // pre_defined | ||
455 | 0x00, 0x00, // reserved | ||
456 | |||
457 | (track.samplerate & 0xff00) >> 8, | ||
458 | (track.samplerate & 0xff), | ||
459 | 0x00, 0x00 // samplerate, 16.16 | ||
460 | |||
461 | // MP4AudioSampleEntry, ISO/IEC 14496-14 | ||
462 | ]), box(types.esds, ESDS)); | ||
463 | }; | ||
464 | })(); | ||
380 | 465 | ||
381 | styp = function() { | 466 | styp = function() { |
382 | return box(types.styp, MAJOR_BRAND, MINOR_VERSION, MAJOR_BRAND); | 467 | return box(types.styp, MAJOR_BRAND, MINOR_VERSION, MAJOR_BRAND); |
383 | }; | 468 | }; |
384 | 469 | ||
385 | tkhd = function(track) { | 470 | tkhd = function(track) { |
386 | return box(types.tkhd, new Uint8Array([ | 471 | var result = new Uint8Array([ |
387 | 0x00, // version 0 | 472 | 0x00, // version 0 |
388 | 0x00, 0x00, 0x07, // flags | 473 | 0x00, 0x00, 0x07, // flags |
389 | 0x00, 0x00, 0x00, 0x00, // creation_time | 474 | 0x00, 0x00, 0x00, 0x00, // creation_time |
... | @@ -401,7 +486,7 @@ tkhd = function(track) { | ... | @@ -401,7 +486,7 @@ tkhd = function(track) { |
401 | 0x00, 0x00, 0x00, 0x00, // reserved | 486 | 0x00, 0x00, 0x00, 0x00, // reserved |
402 | 0x00, 0x00, // layer | 487 | 0x00, 0x00, // layer |
403 | 0x00, 0x00, // alternate_group | 488 | 0x00, 0x00, // alternate_group |
404 | 0x00, 0x00, // non-audio track volume | 489 | 0x01, 0x00, // non-audio track volume |
405 | 0x00, 0x00, // reserved | 490 | 0x00, 0x00, // reserved |
406 | 0x00, 0x01, 0x00, 0x00, | 491 | 0x00, 0x01, 0x00, 0x00, |
407 | 0x00, 0x00, 0x00, 0x00, | 492 | 0x00, 0x00, 0x00, 0x00, |
... | @@ -418,7 +503,9 @@ tkhd = function(track) { | ... | @@ -418,7 +503,9 @@ tkhd = function(track) { |
418 | (track.height & 0xFF00) >> 8, | 503 | (track.height & 0xFF00) >> 8, |
419 | track.height & 0xFF, | 504 | track.height & 0xFF, |
420 | 0x00, 0x00 // height | 505 | 0x00, 0x00 // height |
421 | ])); | 506 | ]); |
507 | |||
508 | return box(types.tkhd, result); | ||
422 | }; | 509 | }; |
423 | 510 | ||
424 | traf = function(track) { | 511 | traf = function(track) { |
... | @@ -461,7 +548,7 @@ trak = function(track) { | ... | @@ -461,7 +548,7 @@ trak = function(track) { |
461 | }; | 548 | }; |
462 | 549 | ||
463 | trex = function(track) { | 550 | trex = function(track) { |
464 | return box(types.trex, new Uint8Array([ | 551 | var result = new Uint8Array([ |
465 | 0x00, // version 0 | 552 | 0x00, // version 0 |
466 | 0x00, 0x00, 0x00, // flags | 553 | 0x00, 0x00, 0x00, // flags |
467 | (track.id & 0xFF000000) >> 24, | 554 | (track.id & 0xFF000000) >> 24, |
... | @@ -472,7 +559,16 @@ trex = function(track) { | ... | @@ -472,7 +559,16 @@ trex = function(track) { |
472 | 0x00, 0x00, 0x00, 0x00, // default_sample_duration | 559 | 0x00, 0x00, 0x00, 0x00, // default_sample_duration |
473 | 0x00, 0x00, 0x00, 0x00, // default_sample_size | 560 | 0x00, 0x00, 0x00, 0x00, // default_sample_size |
474 | 0x00, 0x01, 0x00, 0x01 // default_sample_flags | 561 | 0x00, 0x01, 0x00, 0x01 // default_sample_flags |
475 | ])); | 562 | ]); |
563 | // the last two bytes of default_sample_flags is the sample | ||
564 | // degradation priority, a hint about the importance of this sample | ||
565 | // relative to others. Lower the degradation priority for all sample | ||
566 | // types other than video. | ||
567 | if (track.type !== 'video') { | ||
568 | result[result.length - 1] = 0x00; | ||
569 | } | ||
570 | |||
571 | return box(types.trex, result); | ||
476 | }; | 572 | }; |
477 | 573 | ||
478 | trun = function(track, offset) { | 574 | trun = function(track, offset) { | ... | ... |
... | @@ -16,12 +16,29 @@ | ... | @@ -16,12 +16,29 @@ |
16 | 16 | ||
17 | var | 17 | var |
18 | TransportPacketStream, TransportParseStream, ElementaryStream, VideoSegmentStream, | 18 | TransportPacketStream, TransportParseStream, ElementaryStream, VideoSegmentStream, |
19 | Transmuxer, AacStream, H264Stream, NalByteStream, | 19 | AudioSegmentStream, Transmuxer, AacStream, H264Stream, NalByteStream, |
20 | MP2T_PACKET_LENGTH, H264_STREAM_TYPE, ADTS_STREAM_TYPE, mp4; | 20 | MP2T_PACKET_LENGTH, H264_STREAM_TYPE, ADTS_STREAM_TYPE, |
21 | ADTS_SAMPLING_FREQUENCIES, mp4; | ||
21 | 22 | ||
22 | MP2T_PACKET_LENGTH = 188; // bytes | 23 | MP2T_PACKET_LENGTH = 188; // bytes |
23 | H264_STREAM_TYPE = 0x1b; | 24 | H264_STREAM_TYPE = 0x1b; |
24 | ADTS_STREAM_TYPE = 0x0f; | 25 | ADTS_STREAM_TYPE = 0x0f; |
26 | ADTS_SAMPLING_FREQUENCIES = [ | ||
27 | 96000, | ||
28 | 88200, | ||
29 | 64000, | ||
30 | 48000, | ||
31 | 44100, | ||
32 | 32000, | ||
33 | 24000, | ||
34 | 22050, | ||
35 | 16000, | ||
36 | 12000, | ||
37 | 11025, | ||
38 | 8000, | ||
39 | 7350 | ||
40 | ]; | ||
41 | |||
25 | mp4 = videojs.mp4; | 42 | mp4 = videojs.mp4; |
26 | 43 | ||
27 | /** | 44 | /** |
... | @@ -438,6 +455,11 @@ AacStream = function() { | ... | @@ -438,6 +455,11 @@ AacStream = function() { |
438 | 455 | ||
439 | // deliver the AAC frame | 456 | // deliver the AAC frame |
440 | this.trigger('data', { | 457 | this.trigger('data', { |
458 | channelcount: ((buffer[i + 1] & 1) << 3) | | ||
459 | ((buffer[i + 2] & 0xc0) >> 6), | ||
460 | samplerate: ADTS_SAMPLING_FREQUENCIES[(buffer[i + 1] & 0x3c) >> 2], | ||
461 | // assume ISO/IEC 14496-12 AudioSampleEntry default of 16 | ||
462 | samplesize: 16, | ||
441 | data: buffer.subarray(i + 6, i + frameLength - 1) | 463 | data: buffer.subarray(i + 6, i + frameLength - 1) |
442 | }); | 464 | }); |
443 | 465 | ||
... | @@ -457,6 +479,62 @@ AacStream = function() { | ... | @@ -457,6 +479,62 @@ AacStream = function() { |
457 | AacStream.prototype = new videojs.Hls.Stream(); | 479 | AacStream.prototype = new videojs.Hls.Stream(); |
458 | 480 | ||
459 | /** | 481 | /** |
482 | * Constructs a single-track, ISO BMFF media segment from AAC data | ||
483 | * events. The output of this stream can be fed to a SourceBuffer | ||
484 | * configured with a suitable initialization segment. | ||
485 | */ | ||
486 | // TODO: share common code with VideoSegmentStream | ||
487 | AudioSegmentStream = function(track) { | ||
488 | var aacFrames = [], aacFramesLength = 0, sequenceNumber = 0; | ||
489 | AudioSegmentStream.prototype.init.call(this); | ||
490 | |||
491 | this.push = function(data) { | ||
492 | // buffer audio data until end() is called | ||
493 | aacFrames.push(data); | ||
494 | aacFramesLength += data.data.byteLength; | ||
495 | }; | ||
496 | |||
497 | this.end = function() { | ||
498 | var boxes, currentFrame, data, sample, i, mdat, moof; | ||
499 | // return early if no audio data has been observed | ||
500 | if (aacFramesLength === 0) { | ||
501 | return; | ||
502 | } | ||
503 | |||
504 | // concatenate the audio data to constuct the mdat | ||
505 | data = new Uint8Array(aacFramesLength); | ||
506 | track.samples = []; | ||
507 | while (aacFramesLength.length) { | ||
508 | currentFrame = aacFrames[0]; | ||
509 | sample = { | ||
510 | size: currentFrame.data.byteLength, | ||
511 | duration: 1024 // FIXME calculate for realz | ||
512 | }; | ||
513 | track.samples.push(sample); | ||
514 | |||
515 | data.set(currentFrame.data, i); | ||
516 | i += currentFrame.data.byteLength; | ||
517 | |||
518 | aacFrames.shift(); | ||
519 | } | ||
520 | aacFramesLength = 0; | ||
521 | mdat = mp4.mdat(data); | ||
522 | |||
523 | moof = mp4.moof(sequenceNumber, [track]); | ||
524 | boxes = new Uint8Array(moof.byteLength + mdat.byteLength); | ||
525 | |||
526 | // bump the sequence number for next time | ||
527 | sequenceNumber++; | ||
528 | |||
529 | boxes.set(moof); | ||
530 | boxes.set(mdat, moof.byteLength); | ||
531 | |||
532 | this.trigger('data', boxes); | ||
533 | }; | ||
534 | }; | ||
535 | AudioSegmentStream.prototype = new videojs.Hls.Stream(); | ||
536 | |||
537 | /** | ||
460 | * Accepts a NAL unit byte stream and unpacks the embedded NAL units. | 538 | * Accepts a NAL unit byte stream and unpacks the embedded NAL units. |
461 | */ | 539 | */ |
462 | NalByteStream = function() { | 540 | NalByteStream = function() { |
... | @@ -539,7 +617,7 @@ NalByteStream = function() { | ... | @@ -539,7 +617,7 @@ NalByteStream = function() { |
539 | 617 | ||
540 | this.end = function() { | 618 | this.end = function() { |
541 | // deliver the last buffered NAL unit | 619 | // deliver the last buffered NAL unit |
542 | if (buffer.byteLength > 3) { | 620 | if (buffer && buffer.byteLength > 3) { |
543 | this.trigger('data', buffer.subarray(syncPoint + 3)); | 621 | this.trigger('data', buffer.subarray(syncPoint + 3)); |
544 | } | 622 | } |
545 | }; | 623 | }; |
... | @@ -763,12 +841,19 @@ VideoSegmentStream = function(track) { | ... | @@ -763,12 +841,19 @@ VideoSegmentStream = function(track) { |
763 | this.end = function() { | 841 | this.end = function() { |
764 | var startUnit, currentNal, moof, mdat, boxes, i, data, view, sample; | 842 | var startUnit, currentNal, moof, mdat, boxes, i, data, view, sample; |
765 | 843 | ||
844 | // return early if no video data has been observed | ||
845 | if (nalUnitsLength === 0) { | ||
846 | return; | ||
847 | } | ||
848 | |||
766 | // concatenate the video data and construct the mdat | 849 | // concatenate the video data and construct the mdat |
767 | // first, we have to build the index from byte locations to | 850 | // first, we have to build the index from byte locations to |
768 | // samples (that is, frames) in the video data | 851 | // samples (that is, frames) in the video data |
769 | data = new Uint8Array(nalUnitsLength + (4 * nalUnits.length)); | 852 | data = new Uint8Array(nalUnitsLength + (4 * nalUnits.length)); |
770 | view = new DataView(data.buffer); | 853 | view = new DataView(data.buffer); |
771 | track.samples = []; | 854 | track.samples = []; |
855 | |||
856 | // see ISO/IEC 14496-12:2012, section 8.6.4.3 | ||
772 | sample = { | 857 | sample = { |
773 | size: 0, | 858 | size: 0, |
774 | flags: { | 859 | flags: { |
... | @@ -853,11 +938,14 @@ VideoSegmentStream.prototype = new videojs.Hls.Stream(); | ... | @@ -853,11 +938,14 @@ VideoSegmentStream.prototype = new videojs.Hls.Stream(); |
853 | Transmuxer = function() { | 938 | Transmuxer = function() { |
854 | var | 939 | var |
855 | self = this, | 940 | self = this, |
856 | track, | 941 | videoTrack, |
942 | audioTrack, | ||
857 | config, | 943 | config, |
858 | pps, | 944 | pps, |
859 | 945 | ||
860 | packetStream, parseStream, elementaryStream, aacStream, h264Stream, videoSegmentStream; | 946 | packetStream, parseStream, elementaryStream, |
947 | aacStream, h264Stream, | ||
948 | videoSegmentStream, audioSegmentStream; | ||
861 | 949 | ||
862 | Transmuxer.prototype.init.call(this); | 950 | Transmuxer.prototype.init.call(this); |
863 | 951 | ||
... | @@ -880,51 +968,78 @@ Transmuxer = function() { | ... | @@ -880,51 +968,78 @@ Transmuxer = function() { |
880 | !config) { | 968 | !config) { |
881 | config = data.config; | 969 | config = data.config; |
882 | 970 | ||
883 | track.width = config.width; | 971 | videoTrack.width = config.width; |
884 | track.height = config.height; | 972 | videoTrack.height = config.height; |
885 | track.sps = [data.data]; | 973 | videoTrack.sps = [data.data]; |
886 | track.profileIdc = config.profileIdc; | 974 | videoTrack.profileIdc = config.profileIdc; |
887 | track.levelIdc = config.levelIdc; | 975 | videoTrack.levelIdc = config.levelIdc; |
888 | track.profileCompatibility = config.profileCompatibility; | 976 | videoTrack.profileCompatibility = config.profileCompatibility; |
889 | 977 | ||
890 | // generate an init segment once all the metadata is available | 978 | // generate an init segment once all the metadata is available |
891 | if (pps) { | 979 | if (pps) { |
892 | self.trigger('data', { | 980 | self.trigger('data', { |
893 | data: videojs.mp4.initSegment([track]) | 981 | type: 'video', |
982 | data: videojs.mp4.initSegment([videoTrack]) | ||
894 | }); | 983 | }); |
895 | } | 984 | } |
896 | } | 985 | } |
897 | if (data.nalUnitType === 'pic_parameter_set_rbsp' && | 986 | if (data.nalUnitType === 'pic_parameter_set_rbsp' && |
898 | !pps) { | 987 | !pps) { |
899 | pps = data.data; | 988 | pps = data.data; |
900 | track.pps = [data.data]; | 989 | videoTrack.pps = [data.data]; |
901 | 990 | ||
902 | if (config) { | 991 | if (config) { |
903 | self.trigger('data', { | 992 | self.trigger('data', { |
904 | data: videojs.mp4.initSegment([track]) | 993 | type: 'video', |
994 | data: videojs.mp4.initSegment([videoTrack]) | ||
905 | }); | 995 | }); |
906 | } | 996 | } |
907 | } | 997 | } |
908 | }); | 998 | }); |
909 | // hook up the video segment stream once track metadata is delivered | 999 | // generate an init segment based on the first audio sample |
910 | elementaryStream.on('data', function(data) { | 1000 | aacStream.on('data', function(data) { |
911 | var i, triggerData = function(segment) { | 1001 | if (audioTrack && audioTrack.channelcount === undefined) { |
1002 | audioTrack.channelcount = data.channelcount; | ||
1003 | audioTrack.samplerate = data.samplerate; | ||
1004 | audioTrack.samplesize = data.samplesize; | ||
912 | self.trigger('data', { | 1005 | self.trigger('data', { |
913 | data: segment | 1006 | type: 'audio', |
1007 | data: videojs.mp4.initSegment([audioTrack]) | ||
914 | }); | 1008 | }); |
1009 | } | ||
1010 | }); | ||
1011 | // hook up the segment streams once track metadata is delivered | ||
1012 | elementaryStream.on('data', function(data) { | ||
1013 | var i, triggerData = function(type) { | ||
1014 | return function(segment) { | ||
1015 | self.trigger('data', { | ||
1016 | type: type, | ||
1017 | data: segment | ||
1018 | }); | ||
1019 | }; | ||
915 | }; | 1020 | }; |
916 | if (data.type === 'metadata') { | 1021 | if (data.type === 'metadata') { |
917 | i = data.tracks.length; | 1022 | i = data.tracks.length; |
1023 | |||
1024 | // scan the tracks listed in the metadata | ||
918 | while (i--) { | 1025 | while (i--) { |
919 | if (data.tracks[i].type === 'video') { | 1026 | |
920 | track = data.tracks[i]; | 1027 | // hook up the video segment stream to the first track with h264 data |
921 | if (!videoSegmentStream) { | 1028 | if (data.tracks[i].type === 'video' && !videoSegmentStream) { |
922 | videoSegmentStream = new VideoSegmentStream(track); | 1029 | videoTrack = data.tracks[i]; |
923 | h264Stream.pipe(videoSegmentStream); | 1030 | videoSegmentStream = new VideoSegmentStream(videoTrack); |
924 | videoSegmentStream.on('data', triggerData); | 1031 | h264Stream.pipe(videoSegmentStream); |
925 | } | 1032 | videoSegmentStream.on('data', triggerData('video')); |
926 | break; | 1033 | break; |
927 | } | 1034 | } |
1035 | |||
1036 | // hook up the audio segment stream to the first track with aac data | ||
1037 | if (data.tracks[i].type === 'audio' && !audioSegmentStream) { | ||
1038 | audioTrack = data.tracks[i]; | ||
1039 | audioSegmentStream = new AudioSegmentStream(audioTrack); | ||
1040 | aacStream.pipe(audioSegmentStream); | ||
1041 | audioSegmentStream.on('data', triggerData('audio')); | ||
1042 | } | ||
928 | } | 1043 | } |
929 | } | 1044 | } |
930 | }); | 1045 | }); |
... | @@ -938,6 +1053,7 @@ Transmuxer = function() { | ... | @@ -938,6 +1053,7 @@ Transmuxer = function() { |
938 | elementaryStream.end(); | 1053 | elementaryStream.end(); |
939 | h264Stream.end(); | 1054 | h264Stream.end(); |
940 | videoSegmentStream.end(); | 1055 | videoSegmentStream.end(); |
1056 | audioSegmentStream.end(); | ||
941 | }; | 1057 | }; |
942 | }; | 1058 | }; |
943 | Transmuxer.prototype = new videojs.Hls.Stream(); | 1059 | Transmuxer.prototype = new videojs.Hls.Stream(); | ... | ... |
This diff is collapsed.
Click to expand it.
... | @@ -586,6 +586,75 @@ test('can parse a video stsd', function() { | ... | @@ -586,6 +586,75 @@ test('can parse a video stsd', function() { |
586 | }]); | 586 | }]); |
587 | }); | 587 | }); |
588 | 588 | ||
589 | test('can parse an audio stsd', function() { | ||
590 | var data = box('stsd', | ||
591 | 0x00, // version 0 | ||
592 | 0x00, 0x00, 0x00, // flags | ||
593 | 0x00, 0x00, 0x00, 0x01, // entry_count | ||
594 | box('mp4a', | ||
595 | 0x00, 0x00, 0x00, | ||
596 | 0x00, 0x00, 0x00, // reserved | ||
597 | 0x00, 0x01, // data_reference_index | ||
598 | 0x00, 0x00, 0x00, 0x00, | ||
599 | 0x00, 0x00, 0x00, 0x00, // reserved | ||
600 | 0x00, 0x02, // channelcount | ||
601 | 0x00, 0x10, // samplesize | ||
602 | 0x00, 0x00, // pre_defined | ||
603 | 0x00, 0x00, // reserved | ||
604 | 0xbb, 0x80, 0x00, 0x00, // samplerate, fixed-point 16.16 | ||
605 | box('esds', | ||
606 | 0x00, // version 0 | ||
607 | 0x00, 0x00, 0x00, // flags | ||
608 | 0x03, // tag, ES_DescrTag | ||
609 | 0x00, // length | ||
610 | 0x00, 0x01, // ES_ID | ||
611 | 0x00, // streamDependenceFlag, URL_Flag, reserved, streamPriority | ||
612 | |||
613 | // DecoderConfigDescriptor | ||
614 | 0x04, // tag, DecoderConfigDescrTag | ||
615 | 0x0d, // length | ||
616 | 0x40, // objectProfileIndication, AAC Main | ||
617 | 0x15, // streamType, AudioStream. upstream, reserved | ||
618 | 0x00, 0x00, 0xff, // bufferSizeDB | ||
619 | 0x00, 0x00, 0x00, 0xff, // maxBitrate | ||
620 | 0x00, 0x00, 0x00, 0xaa, // avgBitrate | ||
621 | |||
622 | // DecoderSpecificInfo | ||
623 | 0x05, // tag, DecoderSpecificInfoTag | ||
624 | 0x02, // length | ||
625 | 0x11, 0x90, 0x06, 0x01, 0x02))); // decoder specific info | ||
626 | |||
627 | deepEqual(videojs.inspectMp4(new Uint8Array(data)), [{ | ||
628 | version: 0, | ||
629 | flags: new Uint8Array([0, 0, 0]), | ||
630 | type: 'stsd', | ||
631 | size: 91, | ||
632 | sampleDescriptions: [{ | ||
633 | type: 'mp4a', | ||
634 | dataReferenceIndex: 1, | ||
635 | channelcount: 2, | ||
636 | samplesize: 16, | ||
637 | samplerate: 48000, | ||
638 | size: 75, | ||
639 | streamDescriptor: { | ||
640 | type: 'esds', | ||
641 | version: 0, | ||
642 | size: 39, | ||
643 | flags: new Uint8Array([0, 0, 0]), | ||
644 | esId: 1, | ||
645 | streamPriority: 0, | ||
646 | decoderConfig: { | ||
647 | objectProfileIndication: 0x40, | ||
648 | streamType: 0x05, | ||
649 | bufferSize: 0xff, | ||
650 | maxBitrate: 0xff, | ||
651 | avgBitrate: 0xaa | ||
652 | } | ||
653 | } | ||
654 | }] | ||
655 | }], 'parsed an audio stsd'); | ||
656 | }); | ||
657 | |||
589 | test('can parse an styp', function() { | 658 | test('can parse an styp', function() { |
590 | deepEqual(videojs.inspectMp4(new Uint8Array(box('styp', | 659 | deepEqual(videojs.inspectMp4(new Uint8Array(box('styp', |
591 | 0x61, 0x76, 0x63, 0x31, // major brand | 660 | 0x61, 0x76, 0x63, 0x31, // major brand |
... | @@ -845,6 +914,24 @@ test('can parse a sidx', function(){ | ... | @@ -845,6 +914,24 @@ test('can parse a sidx', function(){ |
845 | }]); | 914 | }]); |
846 | }); | 915 | }); |
847 | 916 | ||
917 | test('can parse an smhd', function() { | ||
918 | var data = box('smhd', | ||
919 | 0x00, // version | ||
920 | 0x00, 0x00, 0x00, // flags | ||
921 | 0x00, 0xff, // balance, fixed-point 8.8 | ||
922 | 0x00, 0x00); // reserved | ||
923 | |||
924 | deepEqual(videojs.inspectMp4(new Uint8Array(data)), | ||
925 | [{ | ||
926 | type: 'smhd', | ||
927 | size: 16, | ||
928 | version: 0, | ||
929 | flags: new Uint8Array([0, 0, 0]), | ||
930 | balance: 0xff / Math.pow(2, 8) | ||
931 | }], | ||
932 | 'parsed an smhd'); | ||
933 | }); | ||
934 | |||
848 | test('can parse a tfdt', function() { | 935 | test('can parse a tfdt', function() { |
849 | var data = box('tfdt', | 936 | var data = box('tfdt', |
850 | 0x00, // version | 937 | 0x00, // version | ... | ... |
... | @@ -129,6 +129,27 @@ var | ... | @@ -129,6 +129,27 @@ var |
129 | avgBitrate: view.getUint32(8) | 129 | avgBitrate: view.getUint32(8) |
130 | }; | 130 | }; |
131 | }, | 131 | }, |
132 | esds: function(data) { | ||
133 | return { | ||
134 | version: data[0], | ||
135 | flags: new Uint8Array(data.subarray(1, 4)), | ||
136 | esId: (data[6] << 8) | data[7], | ||
137 | streamPriority: data[8] & 0x1f, | ||
138 | decoderConfig: { | ||
139 | objectProfileIndication: data[11], | ||
140 | streamType: (data[12] >>> 2) & 0x3f, | ||
141 | bufferSize: (data[13] << 16) | (data[14] << 8) | data[15], | ||
142 | maxBitrate: (data[16] << 24) | | ||
143 | (data[17] << 16) | | ||
144 | (data[18] << 8) | | ||
145 | data[19], | ||
146 | avgBitrate: (data[20] << 24) | | ||
147 | (data[21] << 16) | | ||
148 | (data[22] << 8) | | ||
149 | data[23] | ||
150 | } | ||
151 | }; | ||
152 | }, | ||
132 | ftyp: function(data) { | 153 | ftyp: function(data) { |
133 | var | 154 | var |
134 | view = new DataView(data.buffer, data.byteOffset, data.byteLength), | 155 | view = new DataView(data.buffer, data.byteOffset, data.byteLength), |
... | @@ -247,6 +268,30 @@ var | ... | @@ -247,6 +268,30 @@ var |
247 | boxes: videojs.inspectMp4(data) | 268 | boxes: videojs.inspectMp4(data) |
248 | }; | 269 | }; |
249 | }, | 270 | }, |
271 | // codingname, not a first-class box type. stsd entries share the | ||
272 | // same format as real boxes so the parsing infrastructure can be | ||
273 | // shared | ||
274 | mp4a: function(data) { | ||
275 | var | ||
276 | view = new DataView(data.buffer, data.byteOffset, data.byteLength), | ||
277 | result = { | ||
278 | // 6 bytes reserved | ||
279 | dataReferenceIndex: view.getUint16(6), | ||
280 | // 4 + 4 bytes reserved | ||
281 | channelcount: view.getUint16(16), | ||
282 | samplesize: view.getUint16(18), | ||
283 | // 2 bytes pre_defined | ||
284 | // 2 bytes reserved | ||
285 | samplerate: view.getUint16(24) + (view.getUint16(26) / 65536) | ||
286 | }; | ||
287 | |||
288 | // if there are more bytes to process, assume this is an ISO/IEC | ||
289 | // 14496-14 MP4AudioSampleEntry and parse the ESDBox | ||
290 | if (data.byteLength > 28) { | ||
291 | result.streamDescriptor = videojs.inspectMp4(data.subarray(28))[0]; | ||
292 | } | ||
293 | return result; | ||
294 | }, | ||
250 | moof: function(data) { | 295 | moof: function(data) { |
251 | return { | 296 | return { |
252 | boxes: videojs.inspectMp4(data) | 297 | boxes: videojs.inspectMp4(data) |
... | @@ -357,6 +402,13 @@ var | ... | @@ -357,6 +402,13 @@ var |
357 | 402 | ||
358 | return result; | 403 | return result; |
359 | }, | 404 | }, |
405 | smhd: function(data) { | ||
406 | return { | ||
407 | version: data[0], | ||
408 | flags: new Uint8Array(data.subarray(1, 4)), | ||
409 | balance: data[4] + (data[5] / 256) | ||
410 | }; | ||
411 | }, | ||
360 | stbl: function(data) { | 412 | stbl: function(data) { |
361 | return { | 413 | return { |
362 | boxes: videojs.inspectMp4(data) | 414 | boxes: videojs.inspectMp4(data) | ... | ... |
... | @@ -181,8 +181,8 @@ | ... | @@ -181,8 +181,8 @@ |
181 | 181 | ||
182 | mediaSource.addEventListener('sourceopen', function() { | 182 | mediaSource.addEventListener('sourceopen', function() { |
183 | var | 183 | var |
184 | buffer = mediaSource.addSourceBuffer('video/mp4;codecs=avc1.4d400d'), | 184 | // buffer = mediaSource.addSourceBuffer('video/mp4;codecs=avc1.4d400d'); |
185 | one = false; | 185 | buffer = mediaSource.addSourceBuffer('audio/mp4;codecs=mp4a.40.2'); |
186 | buffer.addEventListener('updatestart', logevent); | 186 | buffer.addEventListener('updatestart', logevent); |
187 | buffer.addEventListener('updateend', logevent); | 187 | buffer.addEventListener('updateend', logevent); |
188 | buffer.addEventListener('error', logevent); | 188 | buffer.addEventListener('error', logevent); |
... | @@ -211,27 +211,43 @@ | ... | @@ -211,27 +211,43 @@ |
211 | var segment = new Uint8Array(reader.result), | 211 | var segment = new Uint8Array(reader.result), |
212 | transmuxer = new videojs.mp2t.Transmuxer(), | 212 | transmuxer = new videojs.mp2t.Transmuxer(), |
213 | events = [], | 213 | events = [], |
214 | i = 0, | ||
215 | bytesLength = 0, | ||
216 | init = false, | ||
214 | bytes, | 217 | bytes, |
215 | hex = ''; | 218 | hex = ''; |
216 | 219 | ||
217 | transmuxer.on('data', function(data) { | 220 | transmuxer.on('data', function(data) { |
218 | if (data) { | 221 | if (data && data.type === 'audio') { |
219 | events.push(data.data); | 222 | events.push(data.data); |
223 | bytesLength += data.data.byteLength; | ||
224 | |||
225 | // XXX Media Sources Testing | ||
226 | if (!init) { | ||
227 | vjsParsed = videojs.inspectMp4(data.data); | ||
228 | console.log('appended tmuxed output'); | ||
229 | window.vjsSourceBuffer.appendBuffer(data.data); | ||
230 | init = true; | ||
231 | } | ||
220 | } | 232 | } |
221 | }); | 233 | }); |
222 | transmuxer.push(segment); | 234 | transmuxer.push(segment); |
223 | transmuxer.end(); | 235 | transmuxer.end(); |
224 | 236 | ||
225 | bytes = new Uint8Array(events[0].byteLength + events[1].byteLength); | 237 | bytes = new Uint8Array(bytesLength); |
226 | bytes.set(events[0]); | 238 | i = 0; |
227 | bytes.set(events[1], events[0].byteLength); | 239 | while (events.length) { |
240 | bytes.set(events[0], i); | ||
241 | i += events[0].byteLength; | ||
242 | events.shift(); | ||
243 | } | ||
228 | 244 | ||
229 | vjsParsed = videojs.inspectMp4(bytes); | 245 | // vjsParsed = videojs.inspectMp4(bytes); |
230 | console.log('transmuxed', vjsParsed); | 246 | console.log('transmuxed', videojs.inspectMp4(bytes)); |
231 | diffParsed(); | 247 | diffParsed(); |
232 | 248 | ||
233 | // clear old box info | 249 | // clear old box info |
234 | vjsBoxes.innerHTML = stringify(vjsParsed, null, ' '); | 250 | vjsBoxes.innerHTML = stringify(videojs.inspectMp4(bytes), null, ' '); |
235 | 251 | ||
236 | // write out the result | 252 | // write out the result |
237 | hex += '<pre>'; | 253 | hex += '<pre>'; |
... | @@ -263,8 +279,7 @@ | ... | @@ -263,8 +279,7 @@ |
263 | workingOutput.innerHTML = hex; | 279 | workingOutput.innerHTML = hex; |
264 | 280 | ||
265 | // XXX Media Sources Testing | 281 | // XXX Media Sources Testing |
266 | window.vjsSourceBuffer.appendBuffer(bytes); | 282 | // window.vjsSourceBuffer.appendBuffer(bytes); |
267 | console.log('appended bytes'); | ||
268 | }); | 283 | }); |
269 | reader.readAsArrayBuffer(this.files[0]); | 284 | reader.readAsArrayBuffer(this.files[0]); |
270 | }, false); | 285 | }, false); | ... | ... |
... | @@ -76,27 +76,41 @@ | ... | @@ -76,27 +76,41 @@ |
76 | 76 | ||
77 | // setup the media source | 77 | // setup the media source |
78 | mediaSource.addEventListener('sourceopen', function() { | 78 | mediaSource.addEventListener('sourceopen', function() { |
79 | var buffer = mediaSource.addSourceBuffer('video/mp4;codecs=avc1.4d400d'), | 79 | var videoBuffer = mediaSource.addSourceBuffer('video/mp4;codecs=avc1.4d400d'), |
80 | audioBuffer = mediaSource.addSourceBuffer('audio/mp4;codecs=mp4a.40.2'), | ||
80 | transmuxer = new videojs.mp2t.Transmuxer(), | 81 | transmuxer = new videojs.mp2t.Transmuxer(), |
81 | segments = []; | 82 | videoSegments = [], |
83 | audioSegments = []; | ||
82 | 84 | ||
83 | // expose the machinery for debugging | 85 | // expose the machinery for debugging |
84 | window.vjsMediaSource = mediaSource; | 86 | window.vjsMediaSource = mediaSource; |
85 | window.vjsSourceBuffer = buffer; | 87 | window.vjsSourceBuffer = videoBuffer; |
86 | window.vjsVideo = demo; | 88 | window.vjsVideo = demo; |
87 | 89 | ||
88 | // transmux the MPEG-TS data to BMFF segments | 90 | // transmux the MPEG-TS data to BMFF segments |
89 | transmuxer.on('data', function(segment) { | 91 | transmuxer.on('data', function(segment) { |
90 | segments.push(segment); | 92 | if (segment.type === 'video') { |
93 | videoSegments.push(segment); | ||
94 | } else { | ||
95 | audioSegments.push(segment); | ||
96 | } | ||
91 | }); | 97 | }); |
92 | transmuxer.push(hazeVideo); | 98 | transmuxer.push(hazeVideo); |
93 | transmuxer.end(); | 99 | transmuxer.end(); |
94 | 100 | ||
95 | // buffer up the video data | 101 | // buffer up the video data |
96 | buffer.appendBuffer(segments.shift().data); | 102 | videoBuffer.appendBuffer(videoSegments.shift().data); |
97 | buffer.addEventListener('updateend', function() { | 103 | videoBuffer.addEventListener('updateend', function() { |
98 | if (segments.length) { | 104 | if (videoSegments.length) { |
99 | buffer.appendBuffer(segments.shift().data); | 105 | videoBuffer.appendBuffer(videoSegments.shift().data); |
106 | } | ||
107 | }); | ||
108 | |||
109 | // buffer up the audio data | ||
110 | audioBuffer.appendBuffer(audioSegments.shift().data); | ||
111 | audioBuffer.addEventListener('updateend', function() { | ||
112 | if (audioSegments.length) { | ||
113 | audioBuffer.appendBuffer(audioSegments.shift().data); | ||
100 | } | 114 | } |
101 | }); | 115 | }); |
102 | }); | 116 | }); | ... | ... |
... | @@ -94,8 +94,10 @@ | ... | @@ -94,8 +94,10 @@ |
94 | var onMediaSourceOpen = function() { | 94 | var onMediaSourceOpen = function() { |
95 | console.log('on media open'); | 95 | console.log('on media open'); |
96 | ms.removeEventListener('sourceopen', onMediaSourceOpen); | 96 | ms.removeEventListener('sourceopen', onMediaSourceOpen); |
97 | var sourceBuffer = ms.addSourceBuffer('video/mp4;codecs="avc1.4D400D"'); | 97 | var videoBuffer = ms.addSourceBuffer('video/mp4;codecs="avc1.4D400D"'); |
98 | sourceBuffer.appendBuffer(bytes); | 98 | videoBuffer.appendBuffer(bytes); |
99 | |||
100 | var audioBuffer = ms.addSourceBuffer('audio/mp4;codecs=mp4a.40.2'); | ||
99 | }; | 101 | }; |
100 | 102 | ||
101 | ms.addEventListener('sourceopen', onMediaSourceOpen); | 103 | ms.addEventListener('sourceopen', onMediaSourceOpen); | ... | ... |
... | @@ -47,7 +47,9 @@ var | ... | @@ -47,7 +47,9 @@ var |
47 | validateTrack, | 47 | validateTrack, |
48 | validateTrackFragment, | 48 | validateTrackFragment, |
49 | 49 | ||
50 | videoPes; | 50 | transportPacket, |
51 | videoPes, | ||
52 | audioPes; | ||
51 | 53 | ||
52 | module('MP2T Packet Stream', { | 54 | module('MP2T Packet Stream', { |
53 | setup: function() { | 55 | setup: function() { |
... | @@ -397,15 +399,22 @@ test('parses an elementary stream packet with a pts and dts', function() { | ... | @@ -397,15 +399,22 @@ test('parses an elementary stream packet with a pts and dts', function() { |
397 | equal(2 / 90, packet.dts, 'parsed the dts'); | 399 | equal(2 / 90, packet.dts, 'parsed the dts'); |
398 | }); | 400 | }); |
399 | 401 | ||
400 | // helper function to create video PES packets | 402 | /** |
401 | videoPes = function(data, first) { | 403 | * Helper function to create transport stream PES packets |
404 | * @param pid {uint8} - the program identifier (PID) | ||
405 | * @param data {arraylike} - the payload bytes | ||
406 | * @payload first {boolean} - true if this PES should be a payload | ||
407 | * unit start | ||
408 | */ | ||
409 | transportPacket = function(pid, data, first) { | ||
402 | var | 410 | var |
403 | adaptationFieldLength = 188 - data.length - (first ? 18 : 17), | 411 | adaptationFieldLength = 188 - data.length - (first ? 15 : 14), |
412 | // transport_packet(), Rec. ITU-T H.222.0, Table 2-2 | ||
404 | result = [ | 413 | result = [ |
405 | // sync byte | 414 | // sync byte |
406 | 0x47, | 415 | 0x47, |
407 | // tei:0 pusi:1 tp:0 pid:0 0000 0001 0001 | 416 | // tei:0 pusi:1 tp:0 pid:0 0000 0001 0001 |
408 | 0x40, 0x11, | 417 | 0x40, pid, |
409 | // tsc:01 afc:11 cc:0000 | 418 | // tsc:01 afc:11 cc:0000 |
410 | 0x70 | 419 | 0x70 |
411 | ].concat([ | 420 | ].concat([ |
... | @@ -422,6 +431,7 @@ videoPes = function(data, first) { | ... | @@ -422,6 +431,7 @@ videoPes = function(data, first) { |
422 | result.push(0xff); | 431 | result.push(0xff); |
423 | } | 432 | } |
424 | 433 | ||
434 | // PES_packet(), Rec. ITU-T H.222.0, Table 2-21 | ||
425 | result = result.concat([ | 435 | result = result.concat([ |
426 | // pscp:0000 0000 0000 0000 0000 0001 | 436 | // pscp:0000 0000 0000 0000 0000 0001 |
427 | 0x00, 0x00, 0x01, | 437 | 0x00, 0x00, 0x01, |
... | @@ -437,14 +447,41 @@ videoPes = function(data, first) { | ... | @@ -437,14 +447,41 @@ videoPes = function(data, first) { |
437 | if (first) { | 447 | if (first) { |
438 | result.push(0x00); | 448 | result.push(0x00); |
439 | } | 449 | } |
440 | result = result.concat([ | 450 | return result.concat(data); |
451 | }; | ||
452 | |||
453 | /** | ||
454 | * Helper function to create video PES packets | ||
455 | * @param data {arraylike} - the payload bytes | ||
456 | * @payload first {boolean} - true if this PES should be a payload | ||
457 | * unit start | ||
458 | */ | ||
459 | videoPes = function(data, first) { | ||
460 | return transportPacket(0x11, [ | ||
441 | // NAL unit start code | 461 | // NAL unit start code |
442 | 0x00, 0x00, 0x01 | 462 | 0x00, 0x00, 0x01 |
443 | ].concat(data)); | 463 | ].concat(data), first); |
444 | return result; | ||
445 | }; | 464 | }; |
446 | standalonePes = videoPes([0xaf, 0x01], true); | 465 | standalonePes = videoPes([0xaf, 0x01], true); |
447 | 466 | ||
467 | /** | ||
468 | * Helper function to create audio PES packets | ||
469 | * @param data {arraylike} - the payload bytes | ||
470 | * @payload first {boolean} - true if this PES should be a payload | ||
471 | * unit start | ||
472 | */ | ||
473 | audioPes = function(data, first) { | ||
474 | var frameLength = data.length + 7; | ||
475 | return transportPacket(0x12, [ | ||
476 | 0xff, 0xf1, // no CRC | ||
477 | 0x10, // AAC Main, 44.1KHz | ||
478 | 0xb0 | ((frameLength & 0x1800) >> 11), // 2 channels | ||
479 | (frameLength & 0x7f8) >> 3, | ||
480 | ((frameLength & 0x07) << 5) + 7, // frame length in bytes | ||
481 | 0x00 // one AAC per ADTS frame | ||
482 | ].concat(data), first); | ||
483 | }; | ||
484 | |||
448 | test('parses an elementary stream packet without a pts or dts', function() { | 485 | test('parses an elementary stream packet without a pts or dts', function() { |
449 | 486 | ||
450 | var packet; | 487 | var packet; |
... | @@ -950,17 +987,24 @@ test('generates AAC frame events from ADTS bytes', function() { | ... | @@ -950,17 +987,24 @@ test('generates AAC frame events from ADTS bytes', function() { |
950 | aacStream.push({ | 987 | aacStream.push({ |
951 | type: 'audio', | 988 | type: 'audio', |
952 | data: new Uint8Array([ | 989 | data: new Uint8Array([ |
953 | 0xff, 0xf1, // no CRC | 990 | 0xff, 0xf1, // no CRC |
954 | 0x00, // AAC Main, 44.1KHz | 991 | 0x10, // AAC Main, 44.1KHz |
955 | 0xfc, 0x01, 0x20, // frame length 9 bytes | 992 | 0xbc, 0x01, 0x20, // 2 channels, frame length 9 bytes |
956 | 0x00, // one AAC per ADTS frame | 993 | 0x00, // one AAC per ADTS frame |
957 | 0x12, 0x34, // AAC payload | 994 | 0x12, 0x34, // AAC payload |
958 | 0x56, 0x78 // extra junk that should be ignored | 995 | 0x56, 0x78 // extra junk that should be ignored |
959 | ]) | 996 | ]) |
960 | }); | 997 | }); |
961 | 998 | ||
962 | equal(frames.length, 1, 'generated one frame'); | 999 | equal(frames.length, 1, 'generated one frame'); |
963 | deepEqual(frames[0].data, new Uint8Array([0x12, 0x34]), 'extracted AAC frame'); | 1000 | deepEqual(frames[0].data, new Uint8Array([0x12, 0x34]), 'extracted AAC frame'); |
1001 | equal(frames[0].channelcount, 2, 'parsed channelcount'); | ||
1002 | equal(frames[0].samplerate, 44100, 'parsed samplerate'); | ||
1003 | |||
1004 | // Chrome only supports 8, 16, and 32 bit sample sizes. Assuming the | ||
1005 | // default value of 16 in ISO/IEC 14496-12 AudioSampleEntry is | ||
1006 | // acceptable. | ||
1007 | equal(frames[0].samplesize, 16, 'parsed samplesize'); | ||
964 | }); | 1008 | }); |
965 | 1009 | ||
966 | // not handled: ADTS with CRC | 1010 | // not handled: ADTS with CRC |
... | @@ -972,7 +1016,7 @@ module('Transmuxer', { | ... | @@ -972,7 +1016,7 @@ module('Transmuxer', { |
972 | } | 1016 | } |
973 | }); | 1017 | }); |
974 | 1018 | ||
975 | test('generates an init segment', function() { | 1019 | test('generates a video init segment', function() { |
976 | var segments = []; | 1020 | var segments = []; |
977 | transmuxer.on('data', function(segment) { | 1021 | transmuxer.on('data', function(segment) { |
978 | segments.push(segment); | 1022 | segments.push(segment); |
... | @@ -980,16 +1024,38 @@ test('generates an init segment', function() { | ... | @@ -980,16 +1024,38 @@ test('generates an init segment', function() { |
980 | transmuxer.push(packetize(PAT)); | 1024 | transmuxer.push(packetize(PAT)); |
981 | transmuxer.push(packetize(PMT)); | 1025 | transmuxer.push(packetize(PMT)); |
982 | transmuxer.push(packetize(videoPes([ | 1026 | transmuxer.push(packetize(videoPes([ |
983 | 0x07, | 1027 | 0x08, 0x01 // pic_parameter_set_rbsp |
1028 | ], true))); | ||
1029 | transmuxer.push(packetize(videoPes([ | ||
1030 | 0x07, // seq_parameter_set_rbsp | ||
984 | 0x27, 0x42, 0xe0, 0x0b, | 1031 | 0x27, 0x42, 0xe0, 0x0b, |
985 | 0xa9, 0x18, 0x60, 0x9d, | 1032 | 0xa9, 0x18, 0x60, 0x9d, |
986 | 0x80, 0x53, 0x06, 0x01, | 1033 | 0x80, 0x53, 0x06, 0x01, |
987 | 0x06, 0xb6, 0xc2, 0xb5, | 1034 | 0x06, 0xb6, 0xc2, 0xb5, |
988 | 0xef, 0x7c, 0x04 | 1035 | 0xef, 0x7c, 0x04 |
1036 | ], false))); | ||
1037 | transmuxer.end(); | ||
1038 | |||
1039 | equal(segments.length, 2, 'generated init and media segments'); | ||
1040 | ok(segments[0].data, 'wrote data in the init segment'); | ||
1041 | equal(segments[0].type, 'video', 'video is the segment type'); | ||
1042 | }); | ||
1043 | |||
1044 | test('generates an audio init segment', function() { | ||
1045 | var segments = []; | ||
1046 | transmuxer.on('data', function(segment) { | ||
1047 | segments.push(segment); | ||
1048 | }); | ||
1049 | transmuxer.push(packetize(PAT)); | ||
1050 | transmuxer.push(packetize(PMT)); | ||
1051 | transmuxer.push(packetize(audioPes([ | ||
1052 | 0x00, 0x01 | ||
989 | ], true))); | 1053 | ], true))); |
990 | transmuxer.end(); | 1054 | transmuxer.end(); |
991 | 1055 | ||
992 | equal(segments.length, 1, 'has an init segment'); | 1056 | equal(segments.length, 2, 'generated init and media segments'); |
1057 | ok(segments[0].data, 'wrote data in the init segment'); | ||
1058 | equal(segments[0].type, 'audio', 'audio is the segment type'); | ||
993 | }); | 1059 | }); |
994 | 1060 | ||
995 | test('buffers video samples until ended', function() { | 1061 | test('buffers video samples until ended', function() { |
... | @@ -1123,20 +1189,26 @@ validateTrackFragment = function(track, segment, metadata) { | ... | @@ -1123,20 +1189,26 @@ validateTrackFragment = function(track, segment, metadata) { |
1123 | 1189 | ||
1124 | test('parses an example mp2t file and generates media segments', function() { | 1190 | test('parses an example mp2t file and generates media segments', function() { |
1125 | var | 1191 | var |
1126 | segments = [], | 1192 | videoSegments = [], |
1193 | audioSegments = [], | ||
1127 | sequenceNumber = window.Infinity, | 1194 | sequenceNumber = window.Infinity, |
1128 | i, boxes, mfhd; | 1195 | i, boxes, mfhd; |
1129 | 1196 | ||
1130 | transmuxer.on('data', function(segment) { | 1197 | transmuxer.on('data', function(segment) { |
1131 | segments.push(segment); | 1198 | if (segment.type === 'video') { |
1199 | videoSegments.push(segment); | ||
1200 | } else if (segment.type === 'audio') { | ||
1201 | audioSegments.push(segment); | ||
1202 | } | ||
1132 | }); | 1203 | }); |
1133 | transmuxer.push(window.bcSegment); | 1204 | transmuxer.push(window.bcSegment); |
1134 | transmuxer.end(); | 1205 | transmuxer.end(); |
1135 | 1206 | ||
1136 | equal(segments.length, 2, 'generated two segments'); | 1207 | equal(videoSegments.length, 2, 'generated two video segments'); |
1208 | equal(audioSegments.length, 2, 'generated two audio segments'); | ||
1137 | 1209 | ||
1138 | boxes = videojs.inspectMp4(segments[0].data); | 1210 | boxes = videojs.inspectMp4(videoSegments[0].data); |
1139 | equal(boxes.length, 2, 'init segments are composed of two boxes'); | 1211 | equal(boxes.length, 2, 'video init segments are composed of two boxes'); |
1140 | equal(boxes[0].type, 'ftyp', 'the first box is an ftyp'); | 1212 | equal(boxes[0].type, 'ftyp', 'the first box is an ftyp'); |
1141 | equal(boxes[1].type, 'moov', 'the second box is a moov'); | 1213 | equal(boxes[1].type, 'moov', 'the second box is a moov'); |
1142 | equal(boxes[1].boxes[0].type, 'mvhd', 'generated an mvhd'); | 1214 | equal(boxes[1].boxes[0].type, 'mvhd', 'generated an mvhd'); |
... | @@ -1150,9 +1222,9 @@ test('parses an example mp2t file and generates media segments', function() { | ... | @@ -1150,9 +1222,9 @@ test('parses an example mp2t file and generates media segments', function() { |
1150 | // }); | 1222 | // }); |
1151 | // equal(boxes[1].boxes[3].type, 'mvex', 'generated an mvex'); | 1223 | // equal(boxes[1].boxes[3].type, 'mvex', 'generated an mvex'); |
1152 | 1224 | ||
1153 | boxes = videojs.inspectMp4(segments[1].data); | 1225 | boxes = videojs.inspectMp4(videoSegments[1].data); |
1154 | ok(boxes.length > 0, 'media segments are not empty'); | 1226 | ok(boxes.length > 0, 'video media segments are not empty'); |
1155 | ok(boxes.length % 2 === 0, 'media segments are composed of pairs of boxes'); | 1227 | ok(boxes.length % 2 === 0, 'video media segments are composed of pairs of boxes'); |
1156 | for (i = 0; i < boxes.length; i += 2) { | 1228 | for (i = 0; i < boxes.length; i += 2) { |
1157 | equal(boxes[i].type, 'moof', 'first box is a moof'); | 1229 | equal(boxes[i].type, 'moof', 'first box is a moof'); |
1158 | equal(boxes[i].boxes.length, 2, 'the moof has two children'); | 1230 | equal(boxes[i].boxes.length, 2, 'the moof has two children'); |
... | @@ -1163,7 +1235,7 @@ test('parses an example mp2t file and generates media segments', function() { | ... | @@ -1163,7 +1235,7 @@ test('parses an example mp2t file and generates media segments', function() { |
1163 | sequenceNumber = mfhd.sequenceNumber; | 1235 | sequenceNumber = mfhd.sequenceNumber; |
1164 | 1236 | ||
1165 | equal(boxes[i + 1].type, 'mdat', 'second box is an mdat'); | 1237 | equal(boxes[i + 1].type, 'mdat', 'second box is an mdat'); |
1166 | validateTrackFragment(boxes[i].boxes[1], segments[1].data, { | 1238 | validateTrackFragment(boxes[i].boxes[1], videoSegments[1].data, { |
1167 | trackId: 256, | 1239 | trackId: 256, |
1168 | width: 388, | 1240 | width: 388, |
1169 | height: 300, | 1241 | height: 300, | ... | ... |
-
Please register or sign in to post a comment