def510ae by David LaPalomento

Generate a valid audio initialization segment

Modify the mp4 generator to inspect audio tracks and generate a working initialization segment. Hook the audio init segment up to the mp4 transmuxing test page.
1 parent 458da175
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
4 var box, dinf, ftyp, mdat, mfhd, minf, moof, moov, mvex, mvhd, trak, 4 var box, dinf, ftyp, mdat, mfhd, minf, moof, moov, mvex, mvhd, trak,
5 tkhd, mdia, mdhd, hdlr, sdtp, stbl, stsd, styp, traf, trex, trun, 5 tkhd, mdia, mdhd, hdlr, sdtp, stbl, stsd, styp, traf, trex, trun,
6 types, MAJOR_BRAND, MINOR_VERSION, AVC1_BRAND, VIDEO_HDLR, 6 types, MAJOR_BRAND, MINOR_VERSION, AVC1_BRAND, VIDEO_HDLR,
7 AUDIO_HDLR, HDLR_TYPES, VMHD, DREF, STCO, STSC, STSZ, STTS, 7 AUDIO_HDLR, HDLR_TYPES, ESDS, VMHD, SMHD, DREF, STCO, STSC, STSZ, STTS,
8 Uint8Array, DataView; 8 Uint8Array, DataView;
9 9
10 Uint8Array = window.Uint8Array; 10 Uint8Array = window.Uint8Array;
...@@ -19,6 +19,7 @@ DataView = window.DataView; ...@@ -19,6 +19,7 @@ DataView = window.DataView;
19 btrt: [], 19 btrt: [],
20 dinf: [], 20 dinf: [],
21 dref: [], 21 dref: [],
22 esds: [],
22 ftyp: [], 23 ftyp: [],
23 hdlr: [], 24 hdlr: [],
24 mdat: [], 25 mdat: [],
...@@ -28,9 +29,11 @@ DataView = window.DataView; ...@@ -28,9 +29,11 @@ DataView = window.DataView;
28 minf: [], 29 minf: [],
29 moof: [], 30 moof: [],
30 moov: [], 31 moov: [],
32 mp4a: [], // codingname
31 mvex: [], 33 mvex: [],
32 mvhd: [], 34 mvhd: [],
33 sdtp: [], 35 sdtp: [],
36 smhd: [],
34 stbl: [], 37 stbl: [],
35 stco: [], 38 stco: [],
36 stsc: [], 39 stsc: [],
...@@ -109,6 +112,39 @@ DataView = window.DataView; ...@@ -109,6 +112,39 @@ DataView = window.DataView;
109 0x00, // version 0 112 0x00, // version 0
110 0x00, 0x00, 0x01 // entry_flags 113 0x00, 0x00, 0x01 // entry_flags
111 ]); 114 ]);
115 ESDS = new Uint8Array([
116 0x00, // version
117 0x00, 0x00, 0x00, // flags
118
119 // ES_Descriptor
120 0x03, // tag, ES_DescrTag
121 0x19, // length
122 0x00, 0x00, // ES_ID
123 0x00, // streamDependenceFlag, URL_flag, reserved, streamPriority
124
125 // DecoderConfigDescriptor
126 0x04, // tag, DecoderConfigDescrTag
127 0x11, // length
128 0x40, // object type
129 0x15, // streamType
130 0x00, 0x06, 0x00, // bufferSizeDB
131 0x00, 0x00, 0xda, 0xc0, // maxBitrate
132 0x00, 0x00, 0xda, 0xc0, // avgBitrate
133
134 // DecoderSpecificInfo
135 0x05, // tag, DecoderSpecificInfoTag
136 0x02, // length
137 // ISO/IEC 14496-3, AudioSpecificConfig
138 0x11, // AudioObjectType, AAC LC.
139 0x90, // samplingFrequencyIndex, 8 -> 16000. channelConfig, 2 -> stereo.
140 0x06, 0x01, 0x02 // GASpecificConfig
141 ]);
142 SMHD = new Uint8Array([
143 0x00, // version
144 0x00, 0x00, 0x00, // flags
145 0x00, 0x00, // balance, 0 means centered
146 0x00, 0x00 // reserved
147 ]);
112 STCO = new Uint8Array([ 148 STCO = new Uint8Array([
113 0x00, // version 149 0x00, // version
114 0x00, 0x00, 0x00, // flags 150 0x00, 0x00, 0x00, // flags
...@@ -171,24 +207,35 @@ hdlr = function(type) { ...@@ -171,24 +207,35 @@ hdlr = function(type) {
171 mdat = function(data) { 207 mdat = function(data) {
172 return box(types.mdat, data); 208 return box(types.mdat, data);
173 }; 209 };
174 mdhd = function(duration) { 210 mdhd = function(track) {
175 return box(types.mdhd, new Uint8Array([ 211 var result = new Uint8Array([
176 0x00, // version 0 212 0x00, // version 0
177 0x00, 0x00, 0x00, // flags 213 0x00, 0x00, 0x00, // flags
178 0x00, 0x00, 0x00, 0x02, // creation_time 214 0x00, 0x00, 0x00, 0x02, // creation_time
179 0x00, 0x00, 0x00, 0x03, // modification_time 215 0x00, 0x00, 0x00, 0x03, // modification_time
180 0x00, 0x01, 0x5f, 0x90, // timescale, 90,000 "ticks" per second 216 0x00, 0x01, 0x5f, 0x90, // timescale, 90,000 "ticks" per second
181 217
182 (duration & 0xFF000000) >> 24, 218 (track.duration >>> 24),
183 (duration & 0xFF0000) >> 16, 219 (track.duration >>> 16) & 0xFF,
184 (duration & 0xFF00) >> 8, 220 (track.duration >>> 8) & 0xFF,
185 duration & 0xFF, // duration 221 track.duration & 0xFF, // duration
186 0x55, 0xc4, // 'und' language (undetermined) 222 0x55, 0xc4, // 'und' language (undetermined)
187 0x00, 0x00 223 0x00, 0x00
188 ])); 224 ]);
225
226 // Use the sample rate from the track metadata, when it is
227 // defined. The sample rate can be parsed out of an ADTS header, for
228 // instance.
229 if (track.samplerate) {
230 result[12] = (track.samplerate >>> 24);
231 result[13] = (track.samplerate >>> 16) & 0xFF;
232 result[14] = (track.samplerate >>> 8) & 0xFF;
233 result[15] = (track.samplerate) & 0xFF;
234 }
235 return box(types.mdhd, result);
189 }; 236 };
190 mdia = function(track) { 237 mdia = function(track) {
191 return box(types.mdia, mdhd(track.duration), hdlr(track.type), minf(track)); 238 return box(types.mdia, mdhd(track), hdlr(track.type), minf(track));
192 }; 239 };
193 mfhd = function(sequenceNumber) { 240 mfhd = function(sequenceNumber) {
194 return box(types.mfhd, new Uint8Array([ 241 return box(types.mfhd, new Uint8Array([
...@@ -201,7 +248,10 @@ mfhd = function(sequenceNumber) { ...@@ -201,7 +248,10 @@ mfhd = function(sequenceNumber) {
201 ])); 248 ]));
202 }; 249 };
203 minf = function(track) { 250 minf = function(track) {
204 return box(types.minf, box(types.vmhd, VMHD), dinf(), stbl(track)); 251 return box(types.minf,
252 track.type === 'video' ? box(types.vmhd, VMHD) : box(types.smhd, SMHD),
253 dinf(),
254 stbl(track));
205 }; 255 };
206 moof = function(sequenceNumber, tracks) { 256 moof = function(sequenceNumber, tracks) {
207 var 257 var
...@@ -217,7 +267,9 @@ moof = function(sequenceNumber, tracks) { ...@@ -217,7 +267,9 @@ moof = function(sequenceNumber, tracks) {
217 ].concat(trackFragments)); 267 ].concat(trackFragments));
218 }; 268 };
219 /** 269 /**
220 * @param tracks... (optional) {array} the tracks associated with this movie 270 * Returns a movie box.
271 * @param tracks {array} the tracks associated with this movie
272 * @see ISO/IEC 14496-12:2012(E), section 8.2.1
221 */ 273 */
222 moov = function(tracks) { 274 moov = function(tracks) {
223 var 275 var
...@@ -307,12 +359,20 @@ stbl = function(track) { ...@@ -307,12 +359,20 @@ stbl = function(track) {
307 box(types.stco, STCO)); 359 box(types.stco, STCO));
308 }; 360 };
309 361
310 stsd = function(track) { 362 (function() {
311 var sequenceParameterSets = [], pictureParameterSets = [], i; 363 var videoSample, audioSample;
312 364
313 if (track.type === 'audio') { 365 stsd = function(track) {
314 return box(types.stsd); 366
315 } 367 return box(types.stsd, new Uint8Array([
368 0x00, // version 0
369 0x00, 0x00, 0x00, // flags
370 0x00, 0x00, 0x00, 0x01
371 ]), track.type === 'video' ? videoSample(track) : audioSample(track));
372 };
373
374 videoSample = function(track) {
375 var sequenceParameterSets = [], pictureParameterSets = [], i;
316 376
317 // assemble the SPSs 377 // assemble the SPSs
318 for (i = 0; i < track.sps.length; i++) { 378 for (i = 0; i < track.sps.length; i++) {
...@@ -328,11 +388,7 @@ stsd = function(track) { ...@@ -328,11 +388,7 @@ stsd = function(track) {
328 pictureParameterSets = pictureParameterSets.concat(Array.prototype.slice.call(track.pps[i])); 388 pictureParameterSets = pictureParameterSets.concat(Array.prototype.slice.call(track.pps[i]));
329 } 389 }
330 390
331 return box(types.stsd, new Uint8Array([ 391 return box(types.avc1, new Uint8Array([
332 0x00, // version 0
333 0x00, 0x00, 0x00, // flags
334 0x00, 0x00, 0x00, 0x01]),
335 box(types.avc1, new Uint8Array([
336 0x00, 0x00, 0x00, 392 0x00, 0x00, 0x00,
337 0x00, 0x00, 0x00, // reserved 393 0x00, 0x00, 0x00, // reserved
338 0x00, 0x01, // data_reference_index 394 0x00, 0x01, // data_reference_index
...@@ -359,8 +415,8 @@ stsd = function(track) { ...@@ -359,8 +415,8 @@ stsd = function(track) {
359 0x00, 0x00, 0x00, 0x00, 415 0x00, 0x00, 0x00, 0x00,
360 0x00, 0x00, 0x00, // compressorname 416 0x00, 0x00, 0x00, // compressorname
361 0x00, 0x18, // depth = 24 417 0x00, 0x18, // depth = 24
362 0x11, 0x11]), // pre_defined = -1 418 0x11, 0x11 // pre_defined = -1
363 box(types.avcC, new Uint8Array([ 419 ]), box(types.avcC, new Uint8Array([
364 0x01, // configurationVersion 420 0x01, // configurationVersion
365 track.profileIdc, // AVCProfileIndication 421 track.profileIdc, // AVCProfileIndication
366 track.profileCompatibility, // profile_compatibility 422 track.profileCompatibility, // profile_compatibility
...@@ -374,16 +430,45 @@ stsd = function(track) { ...@@ -374,16 +430,45 @@ stsd = function(track) {
374 box(types.btrt, new Uint8Array([ 430 box(types.btrt, new Uint8Array([
375 0x00, 0x1c, 0x9c, 0x80, // bufferSizeDB 431 0x00, 0x1c, 0x9c, 0x80, // bufferSizeDB
376 0x00, 0x2d, 0xc6, 0xc0, // maxBitrate 432 0x00, 0x2d, 0xc6, 0xc0, // maxBitrate
377 0x00, 0x2d, 0xc6, 0xc0])) // avgBitrate 433 0x00, 0x2d, 0xc6, 0xc0
378 )); 434 ])) // avgBitrate
379 }; 435 );
436 };
437
438 audioSample = function(track) {
439 return box(types.mp4a, new Uint8Array([
440
441 // SampleEntry, ISO/IEC 14496-12
442 0x00, 0x00, 0x00,
443 0x00, 0x00, 0x00, // reserved
444 0x00, 0x01, // data_reference_index
445
446 // AudioSampleEntry, ISO/IEC 14496-12
447 0x00, 0x00, 0x00, 0x00, // reserved
448 0x00, 0x00, 0x00, 0x00, // reserved
449 (track.channelcount & 0xff00) >> 8,
450 (track.channelcount & 0xff), // channelcount
451
452 (track.samplesize & 0xff00) >> 8,
453 (track.samplesize & 0xff), // samplesize
454 0x00, 0x00, // pre_defined
455 0x00, 0x00, // reserved
456
457 (track.samplerate & 0xff00) >> 8,
458 (track.samplerate & 0xff),
459 0x00, 0x00 // samplerate, 16.16
460
461 // MP4AudioSampleEntry, ISO/IEC 14496-14
462 ]), box(types.esds, ESDS));
463 };
464 })();
380 465
381 styp = function() { 466 styp = function() {
382 return box(types.styp, MAJOR_BRAND, MINOR_VERSION, MAJOR_BRAND); 467 return box(types.styp, MAJOR_BRAND, MINOR_VERSION, MAJOR_BRAND);
383 }; 468 };
384 469
385 tkhd = function(track) { 470 tkhd = function(track) {
386 return box(types.tkhd, new Uint8Array([ 471 var result = new Uint8Array([
387 0x00, // version 0 472 0x00, // version 0
388 0x00, 0x00, 0x07, // flags 473 0x00, 0x00, 0x07, // flags
389 0x00, 0x00, 0x00, 0x00, // creation_time 474 0x00, 0x00, 0x00, 0x00, // creation_time
...@@ -401,7 +486,7 @@ tkhd = function(track) { ...@@ -401,7 +486,7 @@ tkhd = function(track) {
401 0x00, 0x00, 0x00, 0x00, // reserved 486 0x00, 0x00, 0x00, 0x00, // reserved
402 0x00, 0x00, // layer 487 0x00, 0x00, // layer
403 0x00, 0x00, // alternate_group 488 0x00, 0x00, // alternate_group
404 0x00, 0x00, // non-audio track volume 489 0x01, 0x00, // non-audio track volume
405 0x00, 0x00, // reserved 490 0x00, 0x00, // reserved
406 0x00, 0x01, 0x00, 0x00, 491 0x00, 0x01, 0x00, 0x00,
407 0x00, 0x00, 0x00, 0x00, 492 0x00, 0x00, 0x00, 0x00,
...@@ -418,7 +503,9 @@ tkhd = function(track) { ...@@ -418,7 +503,9 @@ tkhd = function(track) {
418 (track.height & 0xFF00) >> 8, 503 (track.height & 0xFF00) >> 8,
419 track.height & 0xFF, 504 track.height & 0xFF,
420 0x00, 0x00 // height 505 0x00, 0x00 // height
421 ])); 506 ]);
507
508 return box(types.tkhd, result);
422 }; 509 };
423 510
424 traf = function(track) { 511 traf = function(track) {
...@@ -461,7 +548,7 @@ trak = function(track) { ...@@ -461,7 +548,7 @@ trak = function(track) {
461 }; 548 };
462 549
463 trex = function(track) { 550 trex = function(track) {
464 return box(types.trex, new Uint8Array([ 551 var result = new Uint8Array([
465 0x00, // version 0 552 0x00, // version 0
466 0x00, 0x00, 0x00, // flags 553 0x00, 0x00, 0x00, // flags
467 (track.id & 0xFF000000) >> 24, 554 (track.id & 0xFF000000) >> 24,
...@@ -472,7 +559,16 @@ trex = function(track) { ...@@ -472,7 +559,16 @@ trex = function(track) {
472 0x00, 0x00, 0x00, 0x00, // default_sample_duration 559 0x00, 0x00, 0x00, 0x00, // default_sample_duration
473 0x00, 0x00, 0x00, 0x00, // default_sample_size 560 0x00, 0x00, 0x00, 0x00, // default_sample_size
474 0x00, 0x01, 0x00, 0x01 // default_sample_flags 561 0x00, 0x01, 0x00, 0x01 // default_sample_flags
475 ])); 562 ]);
563 // the last two bytes of default_sample_flags is the sample
564 // degradation priority, a hint about the importance of this sample
565 // relative to others. Lower the degradation priority for all sample
566 // types other than video.
567 if (track.type !== 'video') {
568 result[result.length - 1] = 0x00;
569 }
570
571 return box(types.trex, result);
476 }; 572 };
477 573
478 trun = function(track, offset) { 574 trun = function(track, offset) {
......
...@@ -16,12 +16,29 @@ ...@@ -16,12 +16,29 @@
16 16
17 var 17 var
18 TransportPacketStream, TransportParseStream, ElementaryStream, VideoSegmentStream, 18 TransportPacketStream, TransportParseStream, ElementaryStream, VideoSegmentStream,
19 Transmuxer, AacStream, H264Stream, NalByteStream, 19 AudioSegmentStream, Transmuxer, AacStream, H264Stream, NalByteStream,
20 MP2T_PACKET_LENGTH, H264_STREAM_TYPE, ADTS_STREAM_TYPE, mp4; 20 MP2T_PACKET_LENGTH, H264_STREAM_TYPE, ADTS_STREAM_TYPE,
21 ADTS_SAMPLING_FREQUENCIES, mp4;
21 22
22 MP2T_PACKET_LENGTH = 188; // bytes 23 MP2T_PACKET_LENGTH = 188; // bytes
23 H264_STREAM_TYPE = 0x1b; 24 H264_STREAM_TYPE = 0x1b;
24 ADTS_STREAM_TYPE = 0x0f; 25 ADTS_STREAM_TYPE = 0x0f;
26 ADTS_SAMPLING_FREQUENCIES = [
27 96000,
28 88200,
29 64000,
30 48000,
31 44100,
32 32000,
33 24000,
34 22050,
35 16000,
36 12000,
37 11025,
38 8000,
39 7350
40 ];
41
25 mp4 = videojs.mp4; 42 mp4 = videojs.mp4;
26 43
27 /** 44 /**
...@@ -438,6 +455,11 @@ AacStream = function() { ...@@ -438,6 +455,11 @@ AacStream = function() {
438 455
439 // deliver the AAC frame 456 // deliver the AAC frame
440 this.trigger('data', { 457 this.trigger('data', {
458 channelcount: ((buffer[i + 1] & 1) << 3) |
459 ((buffer[i + 2] & 0xc0) >> 6),
460 samplerate: ADTS_SAMPLING_FREQUENCIES[(buffer[i + 1] & 0x3c) >> 2],
461 // assume ISO/IEC 14496-12 AudioSampleEntry default of 16
462 samplesize: 16,
441 data: buffer.subarray(i + 6, i + frameLength - 1) 463 data: buffer.subarray(i + 6, i + frameLength - 1)
442 }); 464 });
443 465
...@@ -457,6 +479,62 @@ AacStream = function() { ...@@ -457,6 +479,62 @@ AacStream = function() {
457 AacStream.prototype = new videojs.Hls.Stream(); 479 AacStream.prototype = new videojs.Hls.Stream();
458 480
459 /** 481 /**
482 * Constructs a single-track, ISO BMFF media segment from AAC data
483 * events. The output of this stream can be fed to a SourceBuffer
484 * configured with a suitable initialization segment.
485 */
486 // TODO: share common code with VideoSegmentStream
487 AudioSegmentStream = function(track) {
488 var aacFrames = [], aacFramesLength = 0, sequenceNumber = 0;
489 AudioSegmentStream.prototype.init.call(this);
490
491 this.push = function(data) {
492 // buffer audio data until end() is called
493 aacFrames.push(data);
494 aacFramesLength += data.data.byteLength;
495 };
496
497 this.end = function() {
498 var boxes, currentFrame, data, sample, i, mdat, moof;
499 // return early if no audio data has been observed
500 if (aacFramesLength === 0) {
501 return;
502 }
503
504 // concatenate the audio data to constuct the mdat
505 data = new Uint8Array(aacFramesLength);
506 track.samples = [];
507 while (aacFramesLength.length) {
508 currentFrame = aacFrames[0];
509 sample = {
510 size: currentFrame.data.byteLength,
511 duration: 1024 // FIXME calculate for realz
512 };
513 track.samples.push(sample);
514
515 data.set(currentFrame.data, i);
516 i += currentFrame.data.byteLength;
517
518 aacFrames.shift();
519 }
520 aacFramesLength = 0;
521 mdat = mp4.mdat(data);
522
523 moof = mp4.moof(sequenceNumber, [track]);
524 boxes = new Uint8Array(moof.byteLength + mdat.byteLength);
525
526 // bump the sequence number for next time
527 sequenceNumber++;
528
529 boxes.set(moof);
530 boxes.set(mdat, moof.byteLength);
531
532 this.trigger('data', boxes);
533 };
534 };
535 AudioSegmentStream.prototype = new videojs.Hls.Stream();
536
537 /**
460 * Accepts a NAL unit byte stream and unpacks the embedded NAL units. 538 * Accepts a NAL unit byte stream and unpacks the embedded NAL units.
461 */ 539 */
462 NalByteStream = function() { 540 NalByteStream = function() {
...@@ -539,7 +617,7 @@ NalByteStream = function() { ...@@ -539,7 +617,7 @@ NalByteStream = function() {
539 617
540 this.end = function() { 618 this.end = function() {
541 // deliver the last buffered NAL unit 619 // deliver the last buffered NAL unit
542 if (buffer.byteLength > 3) { 620 if (buffer && buffer.byteLength > 3) {
543 this.trigger('data', buffer.subarray(syncPoint + 3)); 621 this.trigger('data', buffer.subarray(syncPoint + 3));
544 } 622 }
545 }; 623 };
...@@ -763,12 +841,19 @@ VideoSegmentStream = function(track) { ...@@ -763,12 +841,19 @@ VideoSegmentStream = function(track) {
763 this.end = function() { 841 this.end = function() {
764 var startUnit, currentNal, moof, mdat, boxes, i, data, view, sample; 842 var startUnit, currentNal, moof, mdat, boxes, i, data, view, sample;
765 843
844 // return early if no video data has been observed
845 if (nalUnitsLength === 0) {
846 return;
847 }
848
766 // concatenate the video data and construct the mdat 849 // concatenate the video data and construct the mdat
767 // first, we have to build the index from byte locations to 850 // first, we have to build the index from byte locations to
768 // samples (that is, frames) in the video data 851 // samples (that is, frames) in the video data
769 data = new Uint8Array(nalUnitsLength + (4 * nalUnits.length)); 852 data = new Uint8Array(nalUnitsLength + (4 * nalUnits.length));
770 view = new DataView(data.buffer); 853 view = new DataView(data.buffer);
771 track.samples = []; 854 track.samples = [];
855
856 // see ISO/IEC 14496-12:2012, section 8.6.4.3
772 sample = { 857 sample = {
773 size: 0, 858 size: 0,
774 flags: { 859 flags: {
...@@ -853,11 +938,14 @@ VideoSegmentStream.prototype = new videojs.Hls.Stream(); ...@@ -853,11 +938,14 @@ VideoSegmentStream.prototype = new videojs.Hls.Stream();
853 Transmuxer = function() { 938 Transmuxer = function() {
854 var 939 var
855 self = this, 940 self = this,
856 track, 941 videoTrack,
942 audioTrack,
857 config, 943 config,
858 pps, 944 pps,
859 945
860 packetStream, parseStream, elementaryStream, aacStream, h264Stream, videoSegmentStream; 946 packetStream, parseStream, elementaryStream,
947 aacStream, h264Stream,
948 videoSegmentStream, audioSegmentStream;
861 949
862 Transmuxer.prototype.init.call(this); 950 Transmuxer.prototype.init.call(this);
863 951
...@@ -880,51 +968,78 @@ Transmuxer = function() { ...@@ -880,51 +968,78 @@ Transmuxer = function() {
880 !config) { 968 !config) {
881 config = data.config; 969 config = data.config;
882 970
883 track.width = config.width; 971 videoTrack.width = config.width;
884 track.height = config.height; 972 videoTrack.height = config.height;
885 track.sps = [data.data]; 973 videoTrack.sps = [data.data];
886 track.profileIdc = config.profileIdc; 974 videoTrack.profileIdc = config.profileIdc;
887 track.levelIdc = config.levelIdc; 975 videoTrack.levelIdc = config.levelIdc;
888 track.profileCompatibility = config.profileCompatibility; 976 videoTrack.profileCompatibility = config.profileCompatibility;
889 977
890 // generate an init segment once all the metadata is available 978 // generate an init segment once all the metadata is available
891 if (pps) { 979 if (pps) {
892 self.trigger('data', { 980 self.trigger('data', {
893 data: videojs.mp4.initSegment([track]) 981 type: 'video',
982 data: videojs.mp4.initSegment([videoTrack])
894 }); 983 });
895 } 984 }
896 } 985 }
897 if (data.nalUnitType === 'pic_parameter_set_rbsp' && 986 if (data.nalUnitType === 'pic_parameter_set_rbsp' &&
898 !pps) { 987 !pps) {
899 pps = data.data; 988 pps = data.data;
900 track.pps = [data.data]; 989 videoTrack.pps = [data.data];
901 990
902 if (config) { 991 if (config) {
903 self.trigger('data', { 992 self.trigger('data', {
904 data: videojs.mp4.initSegment([track]) 993 type: 'video',
994 data: videojs.mp4.initSegment([videoTrack])
905 }); 995 });
906 } 996 }
907 } 997 }
908 }); 998 });
909 // hook up the video segment stream once track metadata is delivered 999 // generate an init segment based on the first audio sample
1000 aacStream.on('data', function(data) {
1001 if (audioTrack && audioTrack.channelcount === undefined) {
1002 audioTrack.channelcount = data.channelcount;
1003 audioTrack.samplerate = data.samplerate;
1004 audioTrack.samplesize = data.samplesize;
1005 self.trigger('data', {
1006 type: 'audio',
1007 data: videojs.mp4.initSegment([audioTrack])
1008 });
1009 }
1010 });
1011 // hook up the segment streams once track metadata is delivered
910 elementaryStream.on('data', function(data) { 1012 elementaryStream.on('data', function(data) {
911 var i, triggerData = function(segment) { 1013 var i, triggerData = function(type) {
1014 return function(segment) {
912 self.trigger('data', { 1015 self.trigger('data', {
1016 type: type,
913 data: segment 1017 data: segment
914 }); 1018 });
915 }; 1019 };
1020 };
916 if (data.type === 'metadata') { 1021 if (data.type === 'metadata') {
917 i = data.tracks.length; 1022 i = data.tracks.length;
1023
1024 // scan the tracks listed in the metadata
918 while (i--) { 1025 while (i--) {
919 if (data.tracks[i].type === 'video') { 1026
920 track = data.tracks[i]; 1027 // hook up the video segment stream to the first track with h264 data
921 if (!videoSegmentStream) { 1028 if (data.tracks[i].type === 'video' && !videoSegmentStream) {
922 videoSegmentStream = new VideoSegmentStream(track); 1029 videoTrack = data.tracks[i];
1030 videoSegmentStream = new VideoSegmentStream(videoTrack);
923 h264Stream.pipe(videoSegmentStream); 1031 h264Stream.pipe(videoSegmentStream);
924 videoSegmentStream.on('data', triggerData); 1032 videoSegmentStream.on('data', triggerData('video'));
925 }
926 break; 1033 break;
927 } 1034 }
1035
1036 // hook up the audio segment stream to the first track with aac data
1037 if (data.tracks[i].type === 'audio' && !audioSegmentStream) {
1038 audioTrack = data.tracks[i];
1039 audioSegmentStream = new AudioSegmentStream(audioTrack);
1040 aacStream.pipe(audioSegmentStream);
1041 audioSegmentStream.on('data', triggerData('audio'));
1042 }
928 } 1043 }
929 } 1044 }
930 }); 1045 });
...@@ -938,6 +1053,7 @@ Transmuxer = function() { ...@@ -938,6 +1053,7 @@ Transmuxer = function() {
938 elementaryStream.end(); 1053 elementaryStream.end();
939 h264Stream.end(); 1054 h264Stream.end();
940 videoSegmentStream.end(); 1055 videoSegmentStream.end();
1056 audioSegmentStream.end();
941 }; 1057 };
942 }; 1058 };
943 Transmuxer.prototype = new videojs.Hls.Stream(); 1059 Transmuxer.prototype = new videojs.Hls.Stream();
......
...@@ -22,7 +22,11 @@ ...@@ -22,7 +22,11 @@
22 */ 22 */
23 var 23 var
24 mp4 = videojs.mp4, 24 mp4 = videojs.mp4,
25 inspectMp4 = videojs.inspectMp4; 25 inspectMp4 = videojs.inspectMp4,
26 validateMvhd, validateTrak, validateTkhd, validateMdia,
27 validateMdhd, validateHdlr, validateMinf, validateDinf,
28 validateStbl, validateStsd, validateMvex,
29 validateVideoSample, validateAudioSample;
26 30
27 module('MP4 Generator'); 31 module('MP4 Generator');
28 32
...@@ -39,62 +43,67 @@ test('generates a BSMFF ftyp', function() { ...@@ -39,62 +43,67 @@ test('generates a BSMFF ftyp', function() {
39 equal(boxes[0].minorVersion, 1, 'minor version is one'); 43 equal(boxes[0].minorVersion, 1, 'minor version is one');
40 }); 44 });
41 45
42 test('generates a moov', function() { 46 validateMvhd = function(mvhd) {
43 var boxes, mvhd, tkhd, mdhd, hdlr, minf, mvex,
44 data = mp4.moov([{
45 id: 7,
46 duration: 100,
47 width: 600,
48 height: 300,
49 type: 'video',
50 profileIdc: 3,
51 levelIdc: 5,
52 profileCompatibility: 7,
53 sps: [new Uint8Array([0, 1, 2]), new Uint8Array([3, 4, 5])],
54 pps: [new Uint8Array([6, 7, 8])]
55 }]);
56
57 ok(data, 'box is not null');
58
59 boxes = inspectMp4(data);
60 equal(boxes.length, 1, 'generated a single box');
61 equal(boxes[0].type, 'moov', 'generated a moov type');
62 equal(boxes[0].size, data.byteLength, 'generated size');
63 equal(boxes[0].boxes.length, 3, 'generated three sub boxes');
64
65 mvhd = boxes[0].boxes[0];
66 equal(mvhd.type, 'mvhd', 'generated a mvhd'); 47 equal(mvhd.type, 'mvhd', 'generated a mvhd');
67 equal(mvhd.duration, 0xffffffff, 'wrote the maximum movie header duration'); 48 equal(mvhd.duration, 0xffffffff, 'wrote the maximum movie header duration');
68 equal(mvhd.nextTrackId, 0xffffffff, 'wrote the max next track id'); 49 equal(mvhd.nextTrackId, 0xffffffff, 'wrote the max next track id');
50 };
51
52 validateTrak = function(trak, expected) {
53 expected = expected || {};
54 equal(trak.type, 'trak', 'generated a trak');
55 equal(trak.boxes.length, 2, 'generated two track sub boxes');
69 56
70 equal(boxes[0].boxes[1].type, 'trak', 'generated a trak'); 57 validateTkhd(trak.boxes[0], expected);
71 equal(boxes[0].boxes[1].boxes.length, 2, 'generated two track sub boxes'); 58 validateMdia(trak.boxes[1], expected);
72 tkhd = boxes[0].boxes[1].boxes[0]; 59 };
60
61 validateTkhd = function(tkhd, expected) {
73 equal(tkhd.type, 'tkhd', 'generated a tkhd'); 62 equal(tkhd.type, 'tkhd', 'generated a tkhd');
74 equal(tkhd.trackId, 7, 'wrote the track id'); 63 equal(tkhd.trackId, 7, 'wrote the track id');
75 deepEqual(tkhd.flags, new Uint8Array([0, 0, 7]), 'flags should equal 7'); 64 deepEqual(tkhd.flags, new Uint8Array([0, 0, 7]), 'flags should equal 7');
76 equal(tkhd.duration, 100, 'wrote duration into the track header'); 65 equal(tkhd.duration,
77 equal(tkhd.width, 600, 'wrote width into the track header'); 66 expected.duration || Math.pow(2, 32) - 1,
78 equal(tkhd.height, 300, 'wrote height into the track header'); 67 'wrote duration into the track header');
79 68 equal(tkhd.width, expected.width || 0, 'wrote width into the track header');
80 equal(boxes[0].boxes[1].boxes[1].type, 'mdia', 'generated an mdia type'); 69 equal(tkhd.height, expected.height || 0, 'wrote height into the track header');
81 equal(boxes[0].boxes[1].boxes[1].boxes.length, 3, 'generated three track media sub boxes'); 70 equal(tkhd.volume, 1, 'set volume to 1');
82 71 };
83 mdhd = boxes[0].boxes[1].boxes[1].boxes[0]; 72
73 validateMdia = function(mdia, expected) {
74 equal(mdia.type, 'mdia', 'generated an mdia type');
75 equal(mdia.boxes.length, 3, 'generated three track media sub boxes');
76
77 validateMdhd(mdia.boxes[0], expected);
78 validateHdlr(mdia.boxes[1], expected);
79 validateMinf(mdia.boxes[2], expected);
80 };
81
82 validateMdhd = function(mdhd, expected) {
84 equal(mdhd.type, 'mdhd', 'generate an mdhd type'); 83 equal(mdhd.type, 'mdhd', 'generate an mdhd type');
85 equal(mdhd.language, 'und', 'wrote undetermined language'); 84 equal(mdhd.language, 'und', 'wrote undetermined language');
86 equal(mdhd.duration, 100, 'wrote duration into the media header'); 85 equal(mdhd.timescale, expected.timescale || 90000, 'wrote the timescale');
86 equal(mdhd.duration,
87 expected.duration || Math.pow(2, 32) - 1,
88 'wrote duration into the media header');
89 };
87 90
88 hdlr = boxes[0].boxes[1].boxes[1].boxes[1]; 91 validateHdlr = function(hdlr, expected) {
89 equal(hdlr.type, 'hdlr', 'generate an hdlr type'); 92 equal(hdlr.type, 'hdlr', 'generate an hdlr type');
93 if (expected.type !== 'audio') {
90 equal(hdlr.handlerType, 'vide', 'wrote a video handler'); 94 equal(hdlr.handlerType, 'vide', 'wrote a video handler');
91 equal(hdlr.name, 'VideoHandler', 'wrote the handler name'); 95 equal(hdlr.name, 'VideoHandler', 'wrote the handler name');
96 } else {
97 equal(hdlr.handlerType, 'soun', 'wrote a sound handler');
98 equal(hdlr.name, 'SoundHandler', 'wrote the sound handler name');
99 }
100 };
92 101
93 minf = boxes[0].boxes[1].boxes[1].boxes[2]; 102 validateMinf = function(minf, expected) {
94 equal(minf.type, 'minf', 'generate an minf type'); 103 equal(minf.type, 'minf', 'generate an minf type');
95 equal(minf.boxes.length, 3, 'generates three minf sub boxes'); 104 equal(minf.boxes.length, 3, 'generates three minf sub boxes');
96 105
97 equal(minf.boxes[0].type, 'vmhd', 'generates a vmhd type'); 106 if (expected.type !== 'audio') {
98 deepEqual({ 107 deepEqual({
99 type: 'vmhd', 108 type: 'vmhd',
100 size: 20, 109 size: 20,
...@@ -103,8 +112,21 @@ test('generates a moov', function() { ...@@ -103,8 +112,21 @@ test('generates a moov', function() {
103 graphicsmode: 0, 112 graphicsmode: 0,
104 opcolor: new Uint16Array([0, 0, 0]) 113 opcolor: new Uint16Array([0, 0, 0])
105 }, minf.boxes[0], 'generates a vhmd'); 114 }, minf.boxes[0], 'generates a vhmd');
115 } else {
116 deepEqual({
117 type: 'smhd',
118 size: 16,
119 version: 0,
120 flags: new Uint8Array([0, 0, 0]),
121 balance: 0
122 }, minf.boxes[0], 'generates an smhd');
123 }
124
125 validateDinf(minf.boxes[1]);
126 validateStbl(minf.boxes[2], expected);
127 };
106 128
107 equal(minf.boxes[1].type, 'dinf', 'generates a dinf type'); 129 validateDinf = function(dinf) {
108 deepEqual({ 130 deepEqual({
109 type: 'dinf', 131 type: 'dinf',
110 size: 36, 132 size: 36,
...@@ -120,18 +142,57 @@ test('generates a moov', function() { ...@@ -120,18 +142,57 @@ test('generates a moov', function() {
120 flags: new Uint8Array([0, 0, 1]) 142 flags: new Uint8Array([0, 0, 1])
121 }] 143 }]
122 }] 144 }]
123 }, minf.boxes[1], 'generates a dinf'); 145 }, dinf, 'generates a dinf');
146 };
124 147
125 equal(minf.boxes[2].type, 'stbl', 'generates an stbl type'); 148 validateStbl = function(stbl, expected) {
149 equal(stbl.type, 'stbl', 'generates an stbl type');
150 equal(stbl.boxes.length, 5, 'generated five stbl child boxes');
151
152 validateStsd(stbl.boxes[0], expected);
126 deepEqual({ 153 deepEqual({
127 type: 'stbl', 154 type: 'stts',
128 size: 228, 155 size: 16,
129 boxes: [{ 156 version: 0,
130 type: 'stsd', 157 flags: new Uint8Array([0, 0, 0]),
131 size: 152, 158 timeToSamples: []
159 }, stbl.boxes[1], 'generated an stts');
160 deepEqual({
161 type: 'stsc',
162 size: 16,
163 version: 0,
164 flags: new Uint8Array([0, 0, 0]),
165 sampleToChunks: []
166 }, stbl.boxes[2], 'generated an stsc');
167 deepEqual({
168 type: 'stsz',
169 version: 0,
170 size: 20,
171 flags: new Uint8Array([0, 0, 0]),
172 sampleSize: 0,
173 entries: []
174 }, stbl.boxes[3], 'generated an stsz');
175 deepEqual({
176 type: 'stco',
177 size: 16,
132 version: 0, 178 version: 0,
133 flags: new Uint8Array([0, 0, 0]), 179 flags: new Uint8Array([0, 0, 0]),
134 sampleDescriptions: [{ 180 chunkOffsets: []
181 }, stbl.boxes[4], 'generated and stco');
182 };
183
184 validateStsd = function(stsd, expected) {
185 equal(stsd.type, 'stsd', 'generated an stsd');
186 equal(stsd.sampleDescriptions.length, 1, 'generated one sample');
187 if (expected.type !== 'audio') {
188 validateVideoSample(stsd.sampleDescriptions[0]);
189 } else {
190 validateAudioSample(stsd.sampleDescriptions[0]);
191 }
192 };
193
194 validateVideoSample = function(sample) {
195 deepEqual(sample, {
135 type: 'avc1', 196 type: 'avc1',
136 size: 136, 197 size: 136,
137 dataReferenceIndex: 1, 198 dataReferenceIndex: 1,
...@@ -164,38 +225,40 @@ test('generates a moov', function() { ...@@ -164,38 +225,40 @@ test('generates a moov', function() {
164 maxBitrate: 3000000, 225 maxBitrate: 3000000,
165 avgBitrate: 3000000 226 avgBitrate: 3000000
166 }] 227 }]
167 }] 228 }, 'generated a video sample');
168 }, { 229 };
169 type: 'stts', 230
170 size: 16, 231 validateAudioSample = function(sample) {
171 version: 0, 232 deepEqual(sample, {
172 flags: new Uint8Array([0, 0, 0]), 233 type: 'mp4a',
173 timeToSamples: [] 234 size: 75,
174 }, { 235 dataReferenceIndex: 1,
175 type: 'stsc', 236 channelcount: 2,
176 size: 16, 237 samplesize: 16,
177 version: 0, 238 samplerate: 48000,
178 flags: new Uint8Array([0, 0, 0]), 239 streamDescriptor: {
179 sampleToChunks: [] 240 type: 'esds',
180 }, {
181 type: 'stsz',
182 version: 0,
183 size: 20,
184 flags: new Uint8Array([0, 0, 0]),
185 sampleSize: 0,
186 entries: []
187 }, {
188 type: 'stco',
189 size: 16,
190 version: 0, 241 version: 0,
191 flags: new Uint8Array([0, 0, 0]), 242 flags: new Uint8Array([0, 0, 0]),
192 chunkOffsets: [] 243 size: 39,
193 }] 244 esId: 0,
194 }, minf.boxes[2], 'generates a stbl'); 245 streamPriority: 0,
195 246 // these values were hard-coded based on a working audio init segment
196 247 decoderConfig: {
197 mvex = boxes[0].boxes[2]; 248 avgBitrate: 56000,
198 equal(mvex.type, 'mvex', 'generates an mvex type'); 249 maxBitrate: 56000,
250 bufferSize: 1536,
251 objectProfileIndication: 64,
252 streamType: 5
253 }
254 }
255 }, 'generated an audio sample');
256 };
257
258 validateMvex = function(mvex, options) {
259 options = options || {
260 sampleDegradationPriority: 1
261 };
199 deepEqual({ 262 deepEqual({
200 type: 'mvex', 263 type: 'mvex',
201 size: 40, 264 size: 40,
...@@ -213,17 +276,75 @@ test('generates a moov', function() { ...@@ -213,17 +276,75 @@ test('generates a moov', function() {
213 sampleHasRedundancy: 0, 276 sampleHasRedundancy: 0,
214 samplePaddingValue: 0, 277 samplePaddingValue: 0,
215 sampleIsDifferenceSample: true, 278 sampleIsDifferenceSample: true,
216 sampleDegradationPriority: 1 279 sampleDegradationPriority: options.sampleDegradationPriority
217 }] 280 }]
218 }, mvex, 'writes a movie extends box'); 281 }, mvex, 'writes a movie extends box');
282 };
283
284 test('generates a video moov', function() {
285 var
286 boxes,
287 data = mp4.moov([{
288 id: 7,
289 duration: 100,
290 width: 600,
291 height: 300,
292 type: 'video',
293 profileIdc: 3,
294 levelIdc: 5,
295 profileCompatibility: 7,
296 sps: [new Uint8Array([0, 1, 2]), new Uint8Array([3, 4, 5])],
297 pps: [new Uint8Array([6, 7, 8])]
298 }]);
299
300 ok(data, 'box is not null');
301 boxes = inspectMp4(data);
302 equal(boxes.length, 1, 'generated a single box');
303 equal(boxes[0].type, 'moov', 'generated a moov type');
304 equal(boxes[0].size, data.byteLength, 'generated size');
305 equal(boxes[0].boxes.length, 3, 'generated three sub boxes');
306
307 validateMvhd(boxes[0].boxes[0]);
308 validateTrak(boxes[0].boxes[1], {
309 duration: 100,
310 width: 600,
311 height: 300
312 });
313 validateMvex(boxes[0].boxes[2]);
314 });
315
316 test('generates an audio moov', function() {
317 var
318 data = mp4.moov([{
319 id: 7,
320 type: 'audio',
321 channelcount: 2,
322 samplerate: 48000,
323 samplesize: 16
324 }]),
325 boxes;
326
327 ok(data, 'box is not null');
328 boxes = inspectMp4(data);
329 equal(boxes.length, 1, 'generated a single box');
330 equal(boxes[0].type, 'moov', 'generated a moov type');
331 equal(boxes[0].size, data.byteLength, 'generated size');
332 equal(boxes[0].boxes.length, 3, 'generated three sub boxes');
333
334 validateMvhd(boxes[0].boxes[0]);
335 validateTrak(boxes[0].boxes[1], {
336 type: 'audio',
337 timescale: 48000
338 });
339 validateMvex(boxes[0].boxes[2], {
340 sampleDegradationPriority: 0
341 });
219 }); 342 });
220 343
221 test('generates a sound hdlr', function() { 344 test('generates a sound hdlr', function() {
222 var boxes, hdlr, 345 var boxes, hdlr,
223 data = mp4.moov([{ 346 data = mp4.moov([{
224 duration:100, 347 duration:100,
225 width: 600,
226 height: 300,
227 type: 'audio' 348 type: 'audio'
228 }]); 349 }]);
229 350
......
...@@ -586,6 +586,75 @@ test('can parse a video stsd', function() { ...@@ -586,6 +586,75 @@ test('can parse a video stsd', function() {
586 }]); 586 }]);
587 }); 587 });
588 588
589 test('can parse an audio stsd', function() {
590 var data = box('stsd',
591 0x00, // version 0
592 0x00, 0x00, 0x00, // flags
593 0x00, 0x00, 0x00, 0x01, // entry_count
594 box('mp4a',
595 0x00, 0x00, 0x00,
596 0x00, 0x00, 0x00, // reserved
597 0x00, 0x01, // data_reference_index
598 0x00, 0x00, 0x00, 0x00,
599 0x00, 0x00, 0x00, 0x00, // reserved
600 0x00, 0x02, // channelcount
601 0x00, 0x10, // samplesize
602 0x00, 0x00, // pre_defined
603 0x00, 0x00, // reserved
604 0xbb, 0x80, 0x00, 0x00, // samplerate, fixed-point 16.16
605 box('esds',
606 0x00, // version 0
607 0x00, 0x00, 0x00, // flags
608 0x03, // tag, ES_DescrTag
609 0x00, // length
610 0x00, 0x01, // ES_ID
611 0x00, // streamDependenceFlag, URL_Flag, reserved, streamPriority
612
613 // DecoderConfigDescriptor
614 0x04, // tag, DecoderConfigDescrTag
615 0x0d, // length
616 0x40, // objectProfileIndication, AAC Main
617 0x15, // streamType, AudioStream. upstream, reserved
618 0x00, 0x00, 0xff, // bufferSizeDB
619 0x00, 0x00, 0x00, 0xff, // maxBitrate
620 0x00, 0x00, 0x00, 0xaa, // avgBitrate
621
622 // DecoderSpecificInfo
623 0x05, // tag, DecoderSpecificInfoTag
624 0x02, // length
625 0x11, 0x90, 0x06, 0x01, 0x02))); // decoder specific info
626
627 deepEqual(videojs.inspectMp4(new Uint8Array(data)), [{
628 version: 0,
629 flags: new Uint8Array([0, 0, 0]),
630 type: 'stsd',
631 size: 91,
632 sampleDescriptions: [{
633 type: 'mp4a',
634 dataReferenceIndex: 1,
635 channelcount: 2,
636 samplesize: 16,
637 samplerate: 48000,
638 size: 75,
639 streamDescriptor: {
640 type: 'esds',
641 version: 0,
642 size: 39,
643 flags: new Uint8Array([0, 0, 0]),
644 esId: 1,
645 streamPriority: 0,
646 decoderConfig: {
647 objectProfileIndication: 0x40,
648 streamType: 0x05,
649 bufferSize: 0xff,
650 maxBitrate: 0xff,
651 avgBitrate: 0xaa
652 }
653 }
654 }]
655 }], 'parsed an audio stsd');
656 });
657
589 test('can parse an styp', function() { 658 test('can parse an styp', function() {
590 deepEqual(videojs.inspectMp4(new Uint8Array(box('styp', 659 deepEqual(videojs.inspectMp4(new Uint8Array(box('styp',
591 0x61, 0x76, 0x63, 0x31, // major brand 660 0x61, 0x76, 0x63, 0x31, // major brand
...@@ -845,6 +914,24 @@ test('can parse a sidx', function(){ ...@@ -845,6 +914,24 @@ test('can parse a sidx', function(){
845 }]); 914 }]);
846 }); 915 });
847 916
917 test('can parse an smhd', function() {
918 var data = box('smhd',
919 0x00, // version
920 0x00, 0x00, 0x00, // flags
921 0x00, 0xff, // balance, fixed-point 8.8
922 0x00, 0x00); // reserved
923
924 deepEqual(videojs.inspectMp4(new Uint8Array(data)),
925 [{
926 type: 'smhd',
927 size: 16,
928 version: 0,
929 flags: new Uint8Array([0, 0, 0]),
930 balance: 0xff / Math.pow(2, 8)
931 }],
932 'parsed an smhd');
933 });
934
848 test('can parse a tfdt', function() { 935 test('can parse a tfdt', function() {
849 var data = box('tfdt', 936 var data = box('tfdt',
850 0x00, // version 937 0x00, // version
......
...@@ -129,6 +129,27 @@ var ...@@ -129,6 +129,27 @@ var
129 avgBitrate: view.getUint32(8) 129 avgBitrate: view.getUint32(8)
130 }; 130 };
131 }, 131 },
132 esds: function(data) {
133 return {
134 version: data[0],
135 flags: new Uint8Array(data.subarray(1, 4)),
136 esId: (data[6] << 8) | data[7],
137 streamPriority: data[8] & 0x1f,
138 decoderConfig: {
139 objectProfileIndication: data[11],
140 streamType: (data[12] >>> 2) & 0x3f,
141 bufferSize: (data[13] << 16) | (data[14] << 8) | data[15],
142 maxBitrate: (data[16] << 24) |
143 (data[17] << 16) |
144 (data[18] << 8) |
145 data[19],
146 avgBitrate: (data[20] << 24) |
147 (data[21] << 16) |
148 (data[22] << 8) |
149 data[23]
150 }
151 };
152 },
132 ftyp: function(data) { 153 ftyp: function(data) {
133 var 154 var
134 view = new DataView(data.buffer, data.byteOffset, data.byteLength), 155 view = new DataView(data.buffer, data.byteOffset, data.byteLength),
...@@ -247,6 +268,30 @@ var ...@@ -247,6 +268,30 @@ var
247 boxes: videojs.inspectMp4(data) 268 boxes: videojs.inspectMp4(data)
248 }; 269 };
249 }, 270 },
271 // codingname, not a first-class box type. stsd entries share the
272 // same format as real boxes so the parsing infrastructure can be
273 // shared
274 mp4a: function(data) {
275 var
276 view = new DataView(data.buffer, data.byteOffset, data.byteLength),
277 result = {
278 // 6 bytes reserved
279 dataReferenceIndex: view.getUint16(6),
280 // 4 + 4 bytes reserved
281 channelcount: view.getUint16(16),
282 samplesize: view.getUint16(18),
283 // 2 bytes pre_defined
284 // 2 bytes reserved
285 samplerate: view.getUint16(24) + (view.getUint16(26) / 65536)
286 };
287
288 // if there are more bytes to process, assume this is an ISO/IEC
289 // 14496-14 MP4AudioSampleEntry and parse the ESDBox
290 if (data.byteLength > 28) {
291 result.streamDescriptor = videojs.inspectMp4(data.subarray(28))[0];
292 }
293 return result;
294 },
250 moof: function(data) { 295 moof: function(data) {
251 return { 296 return {
252 boxes: videojs.inspectMp4(data) 297 boxes: videojs.inspectMp4(data)
...@@ -357,6 +402,13 @@ var ...@@ -357,6 +402,13 @@ var
357 402
358 return result; 403 return result;
359 }, 404 },
405 smhd: function(data) {
406 return {
407 version: data[0],
408 flags: new Uint8Array(data.subarray(1, 4)),
409 balance: data[4] + (data[5] / 256)
410 };
411 },
360 stbl: function(data) { 412 stbl: function(data) {
361 return { 413 return {
362 boxes: videojs.inspectMp4(data) 414 boxes: videojs.inspectMp4(data)
......
...@@ -181,8 +181,8 @@ ...@@ -181,8 +181,8 @@
181 181
182 mediaSource.addEventListener('sourceopen', function() { 182 mediaSource.addEventListener('sourceopen', function() {
183 var 183 var
184 buffer = mediaSource.addSourceBuffer('video/mp4;codecs=avc1.4d400d'), 184 // buffer = mediaSource.addSourceBuffer('video/mp4;codecs=avc1.4d400d');
185 one = false; 185 buffer = mediaSource.addSourceBuffer('audio/mp4;codecs=mp4a.40.2');
186 buffer.addEventListener('updatestart', logevent); 186 buffer.addEventListener('updatestart', logevent);
187 buffer.addEventListener('updateend', logevent); 187 buffer.addEventListener('updateend', logevent);
188 buffer.addEventListener('error', logevent); 188 buffer.addEventListener('error', logevent);
...@@ -211,27 +211,43 @@ ...@@ -211,27 +211,43 @@
211 var segment = new Uint8Array(reader.result), 211 var segment = new Uint8Array(reader.result),
212 transmuxer = new videojs.mp2t.Transmuxer(), 212 transmuxer = new videojs.mp2t.Transmuxer(),
213 events = [], 213 events = [],
214 i = 0,
215 bytesLength = 0,
216 init = false,
214 bytes, 217 bytes,
215 hex = ''; 218 hex = '';
216 219
217 transmuxer.on('data', function(data) { 220 transmuxer.on('data', function(data) {
218 if (data) { 221 if (data && data.type === 'audio') {
219 events.push(data.data); 222 events.push(data.data);
223 bytesLength += data.data.byteLength;
224
225 // XXX Media Sources Testing
226 if (!init) {
227 vjsParsed = videojs.inspectMp4(data.data);
228 console.log('appended tmuxed output');
229 window.vjsSourceBuffer.appendBuffer(data.data);
230 init = true;
231 }
220 } 232 }
221 }); 233 });
222 transmuxer.push(segment); 234 transmuxer.push(segment);
223 transmuxer.end(); 235 transmuxer.end();
224 236
225 bytes = new Uint8Array(events[0].byteLength + events[1].byteLength); 237 bytes = new Uint8Array(bytesLength);
226 bytes.set(events[0]); 238 i = 0;
227 bytes.set(events[1], events[0].byteLength); 239 while (events.length) {
240 bytes.set(events[0], i);
241 i += events[0].byteLength;
242 events.shift();
243 }
228 244
229 vjsParsed = videojs.inspectMp4(bytes); 245 // vjsParsed = videojs.inspectMp4(bytes);
230 console.log('transmuxed', vjsParsed); 246 console.log('transmuxed', videojs.inspectMp4(bytes));
231 diffParsed(); 247 diffParsed();
232 248
233 // clear old box info 249 // clear old box info
234 vjsBoxes.innerHTML = stringify(vjsParsed, null, ' '); 250 vjsBoxes.innerHTML = stringify(videojs.inspectMp4(bytes), null, ' ');
235 251
236 // write out the result 252 // write out the result
237 hex += '<pre>'; 253 hex += '<pre>';
...@@ -263,8 +279,7 @@ ...@@ -263,8 +279,7 @@
263 workingOutput.innerHTML = hex; 279 workingOutput.innerHTML = hex;
264 280
265 // XXX Media Sources Testing 281 // XXX Media Sources Testing
266 window.vjsSourceBuffer.appendBuffer(bytes); 282 // window.vjsSourceBuffer.appendBuffer(bytes);
267 console.log('appended bytes');
268 }); 283 });
269 reader.readAsArrayBuffer(this.files[0]); 284 reader.readAsArrayBuffer(this.files[0]);
270 }, false); 285 }, false);
......
...@@ -76,27 +76,41 @@ ...@@ -76,27 +76,41 @@
76 76
77 // setup the media source 77 // setup the media source
78 mediaSource.addEventListener('sourceopen', function() { 78 mediaSource.addEventListener('sourceopen', function() {
79 var buffer = mediaSource.addSourceBuffer('video/mp4;codecs=avc1.4d400d'), 79 var videoBuffer = mediaSource.addSourceBuffer('video/mp4;codecs=avc1.4d400d'),
80 audioBuffer = mediaSource.addSourceBuffer('audio/mp4;codecs=mp4a.40.2'),
80 transmuxer = new videojs.mp2t.Transmuxer(), 81 transmuxer = new videojs.mp2t.Transmuxer(),
81 segments = []; 82 videoSegments = [],
83 audioSegments = [];
82 84
83 // expose the machinery for debugging 85 // expose the machinery for debugging
84 window.vjsMediaSource = mediaSource; 86 window.vjsMediaSource = mediaSource;
85 window.vjsSourceBuffer = buffer; 87 window.vjsSourceBuffer = videoBuffer;
86 window.vjsVideo = demo; 88 window.vjsVideo = demo;
87 89
88 // transmux the MPEG-TS data to BMFF segments 90 // transmux the MPEG-TS data to BMFF segments
89 transmuxer.on('data', function(segment) { 91 transmuxer.on('data', function(segment) {
90 segments.push(segment); 92 if (segment.type === 'video') {
93 videoSegments.push(segment);
94 } else {
95 audioSegments.push(segment);
96 }
91 }); 97 });
92 transmuxer.push(hazeVideo); 98 transmuxer.push(hazeVideo);
93 transmuxer.end(); 99 transmuxer.end();
94 100
95 // buffer up the video data 101 // buffer up the video data
96 buffer.appendBuffer(segments.shift().data); 102 videoBuffer.appendBuffer(videoSegments.shift().data);
97 buffer.addEventListener('updateend', function() { 103 videoBuffer.addEventListener('updateend', function() {
98 if (segments.length) { 104 if (videoSegments.length) {
99 buffer.appendBuffer(segments.shift().data); 105 videoBuffer.appendBuffer(videoSegments.shift().data);
106 }
107 });
108
109 // buffer up the audio data
110 audioBuffer.appendBuffer(audioSegments.shift().data);
111 audioBuffer.addEventListener('updateend', function() {
112 if (audioSegments.length) {
113 audioBuffer.appendBuffer(audioSegments.shift().data);
100 } 114 }
101 }); 115 });
102 }); 116 });
......
...@@ -94,8 +94,10 @@ ...@@ -94,8 +94,10 @@
94 var onMediaSourceOpen = function() { 94 var onMediaSourceOpen = function() {
95 console.log('on media open'); 95 console.log('on media open');
96 ms.removeEventListener('sourceopen', onMediaSourceOpen); 96 ms.removeEventListener('sourceopen', onMediaSourceOpen);
97 var sourceBuffer = ms.addSourceBuffer('video/mp4;codecs="avc1.4D400D"'); 97 var videoBuffer = ms.addSourceBuffer('video/mp4;codecs="avc1.4D400D"');
98 sourceBuffer.appendBuffer(bytes); 98 videoBuffer.appendBuffer(bytes);
99
100 var audioBuffer = ms.addSourceBuffer('audio/mp4;codecs=mp4a.40.2');
99 }; 101 };
100 102
101 ms.addEventListener('sourceopen', onMediaSourceOpen); 103 ms.addEventListener('sourceopen', onMediaSourceOpen);
......
...@@ -47,7 +47,9 @@ var ...@@ -47,7 +47,9 @@ var
47 validateTrack, 47 validateTrack,
48 validateTrackFragment, 48 validateTrackFragment,
49 49
50 videoPes; 50 transportPacket,
51 videoPes,
52 audioPes;
51 53
52 module('MP2T Packet Stream', { 54 module('MP2T Packet Stream', {
53 setup: function() { 55 setup: function() {
...@@ -397,15 +399,22 @@ test('parses an elementary stream packet with a pts and dts', function() { ...@@ -397,15 +399,22 @@ test('parses an elementary stream packet with a pts and dts', function() {
397 equal(2 / 90, packet.dts, 'parsed the dts'); 399 equal(2 / 90, packet.dts, 'parsed the dts');
398 }); 400 });
399 401
400 // helper function to create video PES packets 402 /**
401 videoPes = function(data, first) { 403 * Helper function to create transport stream PES packets
404 * @param pid {uint8} - the program identifier (PID)
405 * @param data {arraylike} - the payload bytes
406 * @payload first {boolean} - true if this PES should be a payload
407 * unit start
408 */
409 transportPacket = function(pid, data, first) {
402 var 410 var
403 adaptationFieldLength = 188 - data.length - (first ? 18 : 17), 411 adaptationFieldLength = 188 - data.length - (first ? 15 : 14),
412 // transport_packet(), Rec. ITU-T H.222.0, Table 2-2
404 result = [ 413 result = [
405 // sync byte 414 // sync byte
406 0x47, 415 0x47,
407 // tei:0 pusi:1 tp:0 pid:0 0000 0001 0001 416 // tei:0 pusi:1 tp:0 pid:0 0000 0001 0001
408 0x40, 0x11, 417 0x40, pid,
409 // tsc:01 afc:11 cc:0000 418 // tsc:01 afc:11 cc:0000
410 0x70 419 0x70
411 ].concat([ 420 ].concat([
...@@ -422,6 +431,7 @@ videoPes = function(data, first) { ...@@ -422,6 +431,7 @@ videoPes = function(data, first) {
422 result.push(0xff); 431 result.push(0xff);
423 } 432 }
424 433
434 // PES_packet(), Rec. ITU-T H.222.0, Table 2-21
425 result = result.concat([ 435 result = result.concat([
426 // pscp:0000 0000 0000 0000 0000 0001 436 // pscp:0000 0000 0000 0000 0000 0001
427 0x00, 0x00, 0x01, 437 0x00, 0x00, 0x01,
...@@ -437,14 +447,41 @@ videoPes = function(data, first) { ...@@ -437,14 +447,41 @@ videoPes = function(data, first) {
437 if (first) { 447 if (first) {
438 result.push(0x00); 448 result.push(0x00);
439 } 449 }
440 result = result.concat([ 450 return result.concat(data);
451 };
452
453 /**
454 * Helper function to create video PES packets
455 * @param data {arraylike} - the payload bytes
456 * @payload first {boolean} - true if this PES should be a payload
457 * unit start
458 */
459 videoPes = function(data, first) {
460 return transportPacket(0x11, [
441 // NAL unit start code 461 // NAL unit start code
442 0x00, 0x00, 0x01 462 0x00, 0x00, 0x01
443 ].concat(data)); 463 ].concat(data), first);
444 return result;
445 }; 464 };
446 standalonePes = videoPes([0xaf, 0x01], true); 465 standalonePes = videoPes([0xaf, 0x01], true);
447 466
467 /**
468 * Helper function to create audio PES packets
469 * @param data {arraylike} - the payload bytes
470 * @payload first {boolean} - true if this PES should be a payload
471 * unit start
472 */
473 audioPes = function(data, first) {
474 var frameLength = data.length + 7;
475 return transportPacket(0x12, [
476 0xff, 0xf1, // no CRC
477 0x10, // AAC Main, 44.1KHz
478 0xb0 | ((frameLength & 0x1800) >> 11), // 2 channels
479 (frameLength & 0x7f8) >> 3,
480 ((frameLength & 0x07) << 5) + 7, // frame length in bytes
481 0x00 // one AAC per ADTS frame
482 ].concat(data), first);
483 };
484
448 test('parses an elementary stream packet without a pts or dts', function() { 485 test('parses an elementary stream packet without a pts or dts', function() {
449 486
450 var packet; 487 var packet;
...@@ -951,8 +988,8 @@ test('generates AAC frame events from ADTS bytes', function() { ...@@ -951,8 +988,8 @@ test('generates AAC frame events from ADTS bytes', function() {
951 type: 'audio', 988 type: 'audio',
952 data: new Uint8Array([ 989 data: new Uint8Array([
953 0xff, 0xf1, // no CRC 990 0xff, 0xf1, // no CRC
954 0x00, // AAC Main, 44.1KHz 991 0x10, // AAC Main, 44.1KHz
955 0xfc, 0x01, 0x20, // frame length 9 bytes 992 0xbc, 0x01, 0x20, // 2 channels, frame length 9 bytes
956 0x00, // one AAC per ADTS frame 993 0x00, // one AAC per ADTS frame
957 0x12, 0x34, // AAC payload 994 0x12, 0x34, // AAC payload
958 0x56, 0x78 // extra junk that should be ignored 995 0x56, 0x78 // extra junk that should be ignored
...@@ -961,6 +998,13 @@ test('generates AAC frame events from ADTS bytes', function() { ...@@ -961,6 +998,13 @@ test('generates AAC frame events from ADTS bytes', function() {
961 998
962 equal(frames.length, 1, 'generated one frame'); 999 equal(frames.length, 1, 'generated one frame');
963 deepEqual(frames[0].data, new Uint8Array([0x12, 0x34]), 'extracted AAC frame'); 1000 deepEqual(frames[0].data, new Uint8Array([0x12, 0x34]), 'extracted AAC frame');
1001 equal(frames[0].channelcount, 2, 'parsed channelcount');
1002 equal(frames[0].samplerate, 44100, 'parsed samplerate');
1003
1004 // Chrome only supports 8, 16, and 32 bit sample sizes. Assuming the
1005 // default value of 16 in ISO/IEC 14496-12 AudioSampleEntry is
1006 // acceptable.
1007 equal(frames[0].samplesize, 16, 'parsed samplesize');
964 }); 1008 });
965 1009
966 // not handled: ADTS with CRC 1010 // not handled: ADTS with CRC
...@@ -972,7 +1016,7 @@ module('Transmuxer', { ...@@ -972,7 +1016,7 @@ module('Transmuxer', {
972 } 1016 }
973 }); 1017 });
974 1018
975 test('generates an init segment', function() { 1019 test('generates a video init segment', function() {
976 var segments = []; 1020 var segments = [];
977 transmuxer.on('data', function(segment) { 1021 transmuxer.on('data', function(segment) {
978 segments.push(segment); 1022 segments.push(segment);
...@@ -980,16 +1024,38 @@ test('generates an init segment', function() { ...@@ -980,16 +1024,38 @@ test('generates an init segment', function() {
980 transmuxer.push(packetize(PAT)); 1024 transmuxer.push(packetize(PAT));
981 transmuxer.push(packetize(PMT)); 1025 transmuxer.push(packetize(PMT));
982 transmuxer.push(packetize(videoPes([ 1026 transmuxer.push(packetize(videoPes([
983 0x07, 1027 0x08, 0x01 // pic_parameter_set_rbsp
1028 ], true)));
1029 transmuxer.push(packetize(videoPes([
1030 0x07, // seq_parameter_set_rbsp
984 0x27, 0x42, 0xe0, 0x0b, 1031 0x27, 0x42, 0xe0, 0x0b,
985 0xa9, 0x18, 0x60, 0x9d, 1032 0xa9, 0x18, 0x60, 0x9d,
986 0x80, 0x53, 0x06, 0x01, 1033 0x80, 0x53, 0x06, 0x01,
987 0x06, 0xb6, 0xc2, 0xb5, 1034 0x06, 0xb6, 0xc2, 0xb5,
988 0xef, 0x7c, 0x04 1035 0xef, 0x7c, 0x04
1036 ], false)));
1037 transmuxer.end();
1038
1039 equal(segments.length, 2, 'generated init and media segments');
1040 ok(segments[0].data, 'wrote data in the init segment');
1041 equal(segments[0].type, 'video', 'video is the segment type');
1042 });
1043
1044 test('generates an audio init segment', function() {
1045 var segments = [];
1046 transmuxer.on('data', function(segment) {
1047 segments.push(segment);
1048 });
1049 transmuxer.push(packetize(PAT));
1050 transmuxer.push(packetize(PMT));
1051 transmuxer.push(packetize(audioPes([
1052 0x00, 0x01
989 ], true))); 1053 ], true)));
990 transmuxer.end(); 1054 transmuxer.end();
991 1055
992 equal(segments.length, 1, 'has an init segment'); 1056 equal(segments.length, 2, 'generated init and media segments');
1057 ok(segments[0].data, 'wrote data in the init segment');
1058 equal(segments[0].type, 'audio', 'audio is the segment type');
993 }); 1059 });
994 1060
995 test('buffers video samples until ended', function() { 1061 test('buffers video samples until ended', function() {
...@@ -1123,20 +1189,26 @@ validateTrackFragment = function(track, segment, metadata) { ...@@ -1123,20 +1189,26 @@ validateTrackFragment = function(track, segment, metadata) {
1123 1189
1124 test('parses an example mp2t file and generates media segments', function() { 1190 test('parses an example mp2t file and generates media segments', function() {
1125 var 1191 var
1126 segments = [], 1192 videoSegments = [],
1193 audioSegments = [],
1127 sequenceNumber = window.Infinity, 1194 sequenceNumber = window.Infinity,
1128 i, boxes, mfhd; 1195 i, boxes, mfhd;
1129 1196
1130 transmuxer.on('data', function(segment) { 1197 transmuxer.on('data', function(segment) {
1131 segments.push(segment); 1198 if (segment.type === 'video') {
1199 videoSegments.push(segment);
1200 } else if (segment.type === 'audio') {
1201 audioSegments.push(segment);
1202 }
1132 }); 1203 });
1133 transmuxer.push(window.bcSegment); 1204 transmuxer.push(window.bcSegment);
1134 transmuxer.end(); 1205 transmuxer.end();
1135 1206
1136 equal(segments.length, 2, 'generated two segments'); 1207 equal(videoSegments.length, 2, 'generated two video segments');
1208 equal(audioSegments.length, 2, 'generated two audio segments');
1137 1209
1138 boxes = videojs.inspectMp4(segments[0].data); 1210 boxes = videojs.inspectMp4(videoSegments[0].data);
1139 equal(boxes.length, 2, 'init segments are composed of two boxes'); 1211 equal(boxes.length, 2, 'video init segments are composed of two boxes');
1140 equal(boxes[0].type, 'ftyp', 'the first box is an ftyp'); 1212 equal(boxes[0].type, 'ftyp', 'the first box is an ftyp');
1141 equal(boxes[1].type, 'moov', 'the second box is a moov'); 1213 equal(boxes[1].type, 'moov', 'the second box is a moov');
1142 equal(boxes[1].boxes[0].type, 'mvhd', 'generated an mvhd'); 1214 equal(boxes[1].boxes[0].type, 'mvhd', 'generated an mvhd');
...@@ -1150,9 +1222,9 @@ test('parses an example mp2t file and generates media segments', function() { ...@@ -1150,9 +1222,9 @@ test('parses an example mp2t file and generates media segments', function() {
1150 // }); 1222 // });
1151 // equal(boxes[1].boxes[3].type, 'mvex', 'generated an mvex'); 1223 // equal(boxes[1].boxes[3].type, 'mvex', 'generated an mvex');
1152 1224
1153 boxes = videojs.inspectMp4(segments[1].data); 1225 boxes = videojs.inspectMp4(videoSegments[1].data);
1154 ok(boxes.length > 0, 'media segments are not empty'); 1226 ok(boxes.length > 0, 'video media segments are not empty');
1155 ok(boxes.length % 2 === 0, 'media segments are composed of pairs of boxes'); 1227 ok(boxes.length % 2 === 0, 'video media segments are composed of pairs of boxes');
1156 for (i = 0; i < boxes.length; i += 2) { 1228 for (i = 0; i < boxes.length; i += 2) {
1157 equal(boxes[i].type, 'moof', 'first box is a moof'); 1229 equal(boxes[i].type, 'moof', 'first box is a moof');
1158 equal(boxes[i].boxes.length, 2, 'the moof has two children'); 1230 equal(boxes[i].boxes.length, 2, 'the moof has two children');
...@@ -1163,7 +1235,7 @@ test('parses an example mp2t file and generates media segments', function() { ...@@ -1163,7 +1235,7 @@ test('parses an example mp2t file and generates media segments', function() {
1163 sequenceNumber = mfhd.sequenceNumber; 1235 sequenceNumber = mfhd.sequenceNumber;
1164 1236
1165 equal(boxes[i + 1].type, 'mdat', 'second box is an mdat'); 1237 equal(boxes[i + 1].type, 'mdat', 'second box is an mdat');
1166 validateTrackFragment(boxes[i].boxes[1], segments[1].data, { 1238 validateTrackFragment(boxes[i].boxes[1], videoSegments[1].data, {
1167 trackId: 256, 1239 trackId: 256,
1168 width: 388, 1240 width: 388,
1169 height: 300, 1241 height: 300,
......