def510ae by David LaPalomento

Generate a valid audio initialization segment

Modify the mp4 generator to inspect audio tracks and generate a working initialization segment. Hook the audio init segment up to the mp4 transmuxing test page.
1 parent 458da175
......@@ -4,7 +4,7 @@
var box, dinf, ftyp, mdat, mfhd, minf, moof, moov, mvex, mvhd, trak,
tkhd, mdia, mdhd, hdlr, sdtp, stbl, stsd, styp, traf, trex, trun,
types, MAJOR_BRAND, MINOR_VERSION, AVC1_BRAND, VIDEO_HDLR,
AUDIO_HDLR, HDLR_TYPES, VMHD, DREF, STCO, STSC, STSZ, STTS,
AUDIO_HDLR, HDLR_TYPES, ESDS, VMHD, SMHD, DREF, STCO, STSC, STSZ, STTS,
Uint8Array, DataView;
Uint8Array = window.Uint8Array;
......@@ -19,6 +19,7 @@ DataView = window.DataView;
btrt: [],
dinf: [],
dref: [],
esds: [],
ftyp: [],
hdlr: [],
mdat: [],
......@@ -28,9 +29,11 @@ DataView = window.DataView;
minf: [],
moof: [],
moov: [],
mp4a: [], // codingname
mvex: [],
mvhd: [],
sdtp: [],
smhd: [],
stbl: [],
stco: [],
stsc: [],
......@@ -109,6 +112,39 @@ DataView = window.DataView;
0x00, // version 0
0x00, 0x00, 0x01 // entry_flags
]);
ESDS = new Uint8Array([
0x00, // version
0x00, 0x00, 0x00, // flags
// ES_Descriptor
0x03, // tag, ES_DescrTag
0x19, // length
0x00, 0x00, // ES_ID
0x00, // streamDependenceFlag, URL_flag, reserved, streamPriority
// DecoderConfigDescriptor
0x04, // tag, DecoderConfigDescrTag
0x11, // length
0x40, // object type
0x15, // streamType
0x00, 0x06, 0x00, // bufferSizeDB
0x00, 0x00, 0xda, 0xc0, // maxBitrate
0x00, 0x00, 0xda, 0xc0, // avgBitrate
// DecoderSpecificInfo
0x05, // tag, DecoderSpecificInfoTag
0x02, // length
// ISO/IEC 14496-3, AudioSpecificConfig
0x11, // AudioObjectType, AAC LC.
0x90, // samplingFrequencyIndex, 8 -> 16000. channelConfig, 2 -> stereo.
0x06, 0x01, 0x02 // GASpecificConfig
]);
SMHD = new Uint8Array([
0x00, // version
0x00, 0x00, 0x00, // flags
0x00, 0x00, // balance, 0 means centered
0x00, 0x00 // reserved
]);
STCO = new Uint8Array([
0x00, // version
0x00, 0x00, 0x00, // flags
......@@ -171,24 +207,35 @@ hdlr = function(type) {
mdat = function(data) {
return box(types.mdat, data);
};
mdhd = function(duration) {
return box(types.mdhd, new Uint8Array([
mdhd = function(track) {
var result = new Uint8Array([
0x00, // version 0
0x00, 0x00, 0x00, // flags
0x00, 0x00, 0x00, 0x02, // creation_time
0x00, 0x00, 0x00, 0x03, // modification_time
0x00, 0x01, 0x5f, 0x90, // timescale, 90,000 "ticks" per second
(duration & 0xFF000000) >> 24,
(duration & 0xFF0000) >> 16,
(duration & 0xFF00) >> 8,
duration & 0xFF, // duration
(track.duration >>> 24),
(track.duration >>> 16) & 0xFF,
(track.duration >>> 8) & 0xFF,
track.duration & 0xFF, // duration
0x55, 0xc4, // 'und' language (undetermined)
0x00, 0x00
]));
]);
// Use the sample rate from the track metadata, when it is
// defined. The sample rate can be parsed out of an ADTS header, for
// instance.
if (track.samplerate) {
result[12] = (track.samplerate >>> 24);
result[13] = (track.samplerate >>> 16) & 0xFF;
result[14] = (track.samplerate >>> 8) & 0xFF;
result[15] = (track.samplerate) & 0xFF;
}
return box(types.mdhd, result);
};
mdia = function(track) {
return box(types.mdia, mdhd(track.duration), hdlr(track.type), minf(track));
return box(types.mdia, mdhd(track), hdlr(track.type), minf(track));
};
mfhd = function(sequenceNumber) {
return box(types.mfhd, new Uint8Array([
......@@ -201,7 +248,10 @@ mfhd = function(sequenceNumber) {
]));
};
minf = function(track) {
return box(types.minf, box(types.vmhd, VMHD), dinf(), stbl(track));
return box(types.minf,
track.type === 'video' ? box(types.vmhd, VMHD) : box(types.smhd, SMHD),
dinf(),
stbl(track));
};
moof = function(sequenceNumber, tracks) {
var
......@@ -217,7 +267,9 @@ moof = function(sequenceNumber, tracks) {
].concat(trackFragments));
};
/**
* @param tracks... (optional) {array} the tracks associated with this movie
* Returns a movie box.
* @param tracks {array} the tracks associated with this movie
* @see ISO/IEC 14496-12:2012(E), section 8.2.1
*/
moov = function(tracks) {
var
......@@ -307,12 +359,20 @@ stbl = function(track) {
box(types.stco, STCO));
};
stsd = function(track) {
var sequenceParameterSets = [], pictureParameterSets = [], i;
(function() {
var videoSample, audioSample;
if (track.type === 'audio') {
return box(types.stsd);
}
stsd = function(track) {
return box(types.stsd, new Uint8Array([
0x00, // version 0
0x00, 0x00, 0x00, // flags
0x00, 0x00, 0x00, 0x01
]), track.type === 'video' ? videoSample(track) : audioSample(track));
};
videoSample = function(track) {
var sequenceParameterSets = [], pictureParameterSets = [], i;
// assemble the SPSs
for (i = 0; i < track.sps.length; i++) {
......@@ -328,11 +388,7 @@ stsd = function(track) {
pictureParameterSets = pictureParameterSets.concat(Array.prototype.slice.call(track.pps[i]));
}
return box(types.stsd, new Uint8Array([
0x00, // version 0
0x00, 0x00, 0x00, // flags
0x00, 0x00, 0x00, 0x01]),
box(types.avc1, new Uint8Array([
return box(types.avc1, new Uint8Array([
0x00, 0x00, 0x00,
0x00, 0x00, 0x00, // reserved
0x00, 0x01, // data_reference_index
......@@ -359,8 +415,8 @@ stsd = function(track) {
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, // compressorname
0x00, 0x18, // depth = 24
0x11, 0x11]), // pre_defined = -1
box(types.avcC, new Uint8Array([
0x11, 0x11 // pre_defined = -1
]), box(types.avcC, new Uint8Array([
0x01, // configurationVersion
track.profileIdc, // AVCProfileIndication
track.profileCompatibility, // profile_compatibility
......@@ -374,16 +430,45 @@ stsd = function(track) {
box(types.btrt, new Uint8Array([
0x00, 0x1c, 0x9c, 0x80, // bufferSizeDB
0x00, 0x2d, 0xc6, 0xc0, // maxBitrate
0x00, 0x2d, 0xc6, 0xc0])) // avgBitrate
));
};
0x00, 0x2d, 0xc6, 0xc0
])) // avgBitrate
);
};
audioSample = function(track) {
return box(types.mp4a, new Uint8Array([
// SampleEntry, ISO/IEC 14496-12
0x00, 0x00, 0x00,
0x00, 0x00, 0x00, // reserved
0x00, 0x01, // data_reference_index
// AudioSampleEntry, ISO/IEC 14496-12
0x00, 0x00, 0x00, 0x00, // reserved
0x00, 0x00, 0x00, 0x00, // reserved
(track.channelcount & 0xff00) >> 8,
(track.channelcount & 0xff), // channelcount
(track.samplesize & 0xff00) >> 8,
(track.samplesize & 0xff), // samplesize
0x00, 0x00, // pre_defined
0x00, 0x00, // reserved
(track.samplerate & 0xff00) >> 8,
(track.samplerate & 0xff),
0x00, 0x00 // samplerate, 16.16
// MP4AudioSampleEntry, ISO/IEC 14496-14
]), box(types.esds, ESDS));
};
})();
styp = function() {
return box(types.styp, MAJOR_BRAND, MINOR_VERSION, MAJOR_BRAND);
};
tkhd = function(track) {
return box(types.tkhd, new Uint8Array([
var result = new Uint8Array([
0x00, // version 0
0x00, 0x00, 0x07, // flags
0x00, 0x00, 0x00, 0x00, // creation_time
......@@ -401,7 +486,7 @@ tkhd = function(track) {
0x00, 0x00, 0x00, 0x00, // reserved
0x00, 0x00, // layer
0x00, 0x00, // alternate_group
0x00, 0x00, // non-audio track volume
0x01, 0x00, // non-audio track volume
0x00, 0x00, // reserved
0x00, 0x01, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
......@@ -418,7 +503,9 @@ tkhd = function(track) {
(track.height & 0xFF00) >> 8,
track.height & 0xFF,
0x00, 0x00 // height
]));
]);
return box(types.tkhd, result);
};
traf = function(track) {
......@@ -461,7 +548,7 @@ trak = function(track) {
};
trex = function(track) {
return box(types.trex, new Uint8Array([
var result = new Uint8Array([
0x00, // version 0
0x00, 0x00, 0x00, // flags
(track.id & 0xFF000000) >> 24,
......@@ -472,7 +559,16 @@ trex = function(track) {
0x00, 0x00, 0x00, 0x00, // default_sample_duration
0x00, 0x00, 0x00, 0x00, // default_sample_size
0x00, 0x01, 0x00, 0x01 // default_sample_flags
]));
]);
// the last two bytes of default_sample_flags is the sample
// degradation priority, a hint about the importance of this sample
// relative to others. Lower the degradation priority for all sample
// types other than video.
if (track.type !== 'video') {
result[result.length - 1] = 0x00;
}
return box(types.trex, result);
};
trun = function(track, offset) {
......
......@@ -16,12 +16,29 @@
var
TransportPacketStream, TransportParseStream, ElementaryStream, VideoSegmentStream,
Transmuxer, AacStream, H264Stream, NalByteStream,
MP2T_PACKET_LENGTH, H264_STREAM_TYPE, ADTS_STREAM_TYPE, mp4;
AudioSegmentStream, Transmuxer, AacStream, H264Stream, NalByteStream,
MP2T_PACKET_LENGTH, H264_STREAM_TYPE, ADTS_STREAM_TYPE,
ADTS_SAMPLING_FREQUENCIES, mp4;
MP2T_PACKET_LENGTH = 188; // bytes
H264_STREAM_TYPE = 0x1b;
ADTS_STREAM_TYPE = 0x0f;
ADTS_SAMPLING_FREQUENCIES = [
96000,
88200,
64000,
48000,
44100,
32000,
24000,
22050,
16000,
12000,
11025,
8000,
7350
];
mp4 = videojs.mp4;
/**
......@@ -438,6 +455,11 @@ AacStream = function() {
// deliver the AAC frame
this.trigger('data', {
channelcount: ((buffer[i + 1] & 1) << 3) |
((buffer[i + 2] & 0xc0) >> 6),
samplerate: ADTS_SAMPLING_FREQUENCIES[(buffer[i + 1] & 0x3c) >> 2],
// assume ISO/IEC 14496-12 AudioSampleEntry default of 16
samplesize: 16,
data: buffer.subarray(i + 6, i + frameLength - 1)
});
......@@ -457,6 +479,62 @@ AacStream = function() {
AacStream.prototype = new videojs.Hls.Stream();
/**
* Constructs a single-track, ISO BMFF media segment from AAC data
* events. The output of this stream can be fed to a SourceBuffer
* configured with a suitable initialization segment.
*/
// TODO: share common code with VideoSegmentStream
AudioSegmentStream = function(track) {
var aacFrames = [], aacFramesLength = 0, sequenceNumber = 0;
AudioSegmentStream.prototype.init.call(this);
this.push = function(data) {
// buffer audio data until end() is called
aacFrames.push(data);
aacFramesLength += data.data.byteLength;
};
this.end = function() {
var boxes, currentFrame, data, sample, i, mdat, moof;
// return early if no audio data has been observed
if (aacFramesLength === 0) {
return;
}
// concatenate the audio data to constuct the mdat
data = new Uint8Array(aacFramesLength);
track.samples = [];
while (aacFramesLength.length) {
currentFrame = aacFrames[0];
sample = {
size: currentFrame.data.byteLength,
duration: 1024 // FIXME calculate for realz
};
track.samples.push(sample);
data.set(currentFrame.data, i);
i += currentFrame.data.byteLength;
aacFrames.shift();
}
aacFramesLength = 0;
mdat = mp4.mdat(data);
moof = mp4.moof(sequenceNumber, [track]);
boxes = new Uint8Array(moof.byteLength + mdat.byteLength);
// bump the sequence number for next time
sequenceNumber++;
boxes.set(moof);
boxes.set(mdat, moof.byteLength);
this.trigger('data', boxes);
};
};
AudioSegmentStream.prototype = new videojs.Hls.Stream();
/**
* Accepts a NAL unit byte stream and unpacks the embedded NAL units.
*/
NalByteStream = function() {
......@@ -539,7 +617,7 @@ NalByteStream = function() {
this.end = function() {
// deliver the last buffered NAL unit
if (buffer.byteLength > 3) {
if (buffer && buffer.byteLength > 3) {
this.trigger('data', buffer.subarray(syncPoint + 3));
}
};
......@@ -763,12 +841,19 @@ VideoSegmentStream = function(track) {
this.end = function() {
var startUnit, currentNal, moof, mdat, boxes, i, data, view, sample;
// return early if no video data has been observed
if (nalUnitsLength === 0) {
return;
}
// concatenate the video data and construct the mdat
// first, we have to build the index from byte locations to
// samples (that is, frames) in the video data
data = new Uint8Array(nalUnitsLength + (4 * nalUnits.length));
view = new DataView(data.buffer);
track.samples = [];
// see ISO/IEC 14496-12:2012, section 8.6.4.3
sample = {
size: 0,
flags: {
......@@ -853,11 +938,14 @@ VideoSegmentStream.prototype = new videojs.Hls.Stream();
Transmuxer = function() {
var
self = this,
track,
videoTrack,
audioTrack,
config,
pps,
packetStream, parseStream, elementaryStream, aacStream, h264Stream, videoSegmentStream;
packetStream, parseStream, elementaryStream,
aacStream, h264Stream,
videoSegmentStream, audioSegmentStream;
Transmuxer.prototype.init.call(this);
......@@ -880,51 +968,78 @@ Transmuxer = function() {
!config) {
config = data.config;
track.width = config.width;
track.height = config.height;
track.sps = [data.data];
track.profileIdc = config.profileIdc;
track.levelIdc = config.levelIdc;
track.profileCompatibility = config.profileCompatibility;
videoTrack.width = config.width;
videoTrack.height = config.height;
videoTrack.sps = [data.data];
videoTrack.profileIdc = config.profileIdc;
videoTrack.levelIdc = config.levelIdc;
videoTrack.profileCompatibility = config.profileCompatibility;
// generate an init segment once all the metadata is available
if (pps) {
self.trigger('data', {
data: videojs.mp4.initSegment([track])
type: 'video',
data: videojs.mp4.initSegment([videoTrack])
});
}
}
if (data.nalUnitType === 'pic_parameter_set_rbsp' &&
!pps) {
pps = data.data;
track.pps = [data.data];
videoTrack.pps = [data.data];
if (config) {
self.trigger('data', {
data: videojs.mp4.initSegment([track])
type: 'video',
data: videojs.mp4.initSegment([videoTrack])
});
}
}
});
// hook up the video segment stream once track metadata is delivered
// generate an init segment based on the first audio sample
aacStream.on('data', function(data) {
if (audioTrack && audioTrack.channelcount === undefined) {
audioTrack.channelcount = data.channelcount;
audioTrack.samplerate = data.samplerate;
audioTrack.samplesize = data.samplesize;
self.trigger('data', {
type: 'audio',
data: videojs.mp4.initSegment([audioTrack])
});
}
});
// hook up the segment streams once track metadata is delivered
elementaryStream.on('data', function(data) {
var i, triggerData = function(segment) {
var i, triggerData = function(type) {
return function(segment) {
self.trigger('data', {
type: type,
data: segment
});
};
};
if (data.type === 'metadata') {
i = data.tracks.length;
// scan the tracks listed in the metadata
while (i--) {
if (data.tracks[i].type === 'video') {
track = data.tracks[i];
if (!videoSegmentStream) {
videoSegmentStream = new VideoSegmentStream(track);
// hook up the video segment stream to the first track with h264 data
if (data.tracks[i].type === 'video' && !videoSegmentStream) {
videoTrack = data.tracks[i];
videoSegmentStream = new VideoSegmentStream(videoTrack);
h264Stream.pipe(videoSegmentStream);
videoSegmentStream.on('data', triggerData);
}
videoSegmentStream.on('data', triggerData('video'));
break;
}
// hook up the audio segment stream to the first track with aac data
if (data.tracks[i].type === 'audio' && !audioSegmentStream) {
audioTrack = data.tracks[i];
audioSegmentStream = new AudioSegmentStream(audioTrack);
aacStream.pipe(audioSegmentStream);
audioSegmentStream.on('data', triggerData('audio'));
}
}
}
});
......@@ -938,6 +1053,7 @@ Transmuxer = function() {
elementaryStream.end();
h264Stream.end();
videoSegmentStream.end();
audioSegmentStream.end();
};
};
Transmuxer.prototype = new videojs.Hls.Stream();
......
......@@ -22,7 +22,11 @@
*/
var
mp4 = videojs.mp4,
inspectMp4 = videojs.inspectMp4;
inspectMp4 = videojs.inspectMp4,
validateMvhd, validateTrak, validateTkhd, validateMdia,
validateMdhd, validateHdlr, validateMinf, validateDinf,
validateStbl, validateStsd, validateMvex,
validateVideoSample, validateAudioSample;
module('MP4 Generator');
......@@ -39,62 +43,67 @@ test('generates a BSMFF ftyp', function() {
equal(boxes[0].minorVersion, 1, 'minor version is one');
});
test('generates a moov', function() {
var boxes, mvhd, tkhd, mdhd, hdlr, minf, mvex,
data = mp4.moov([{
id: 7,
duration: 100,
width: 600,
height: 300,
type: 'video',
profileIdc: 3,
levelIdc: 5,
profileCompatibility: 7,
sps: [new Uint8Array([0, 1, 2]), new Uint8Array([3, 4, 5])],
pps: [new Uint8Array([6, 7, 8])]
}]);
ok(data, 'box is not null');
boxes = inspectMp4(data);
equal(boxes.length, 1, 'generated a single box');
equal(boxes[0].type, 'moov', 'generated a moov type');
equal(boxes[0].size, data.byteLength, 'generated size');
equal(boxes[0].boxes.length, 3, 'generated three sub boxes');
mvhd = boxes[0].boxes[0];
validateMvhd = function(mvhd) {
equal(mvhd.type, 'mvhd', 'generated a mvhd');
equal(mvhd.duration, 0xffffffff, 'wrote the maximum movie header duration');
equal(mvhd.nextTrackId, 0xffffffff, 'wrote the max next track id');
};
validateTrak = function(trak, expected) {
expected = expected || {};
equal(trak.type, 'trak', 'generated a trak');
equal(trak.boxes.length, 2, 'generated two track sub boxes');
equal(boxes[0].boxes[1].type, 'trak', 'generated a trak');
equal(boxes[0].boxes[1].boxes.length, 2, 'generated two track sub boxes');
tkhd = boxes[0].boxes[1].boxes[0];
validateTkhd(trak.boxes[0], expected);
validateMdia(trak.boxes[1], expected);
};
validateTkhd = function(tkhd, expected) {
equal(tkhd.type, 'tkhd', 'generated a tkhd');
equal(tkhd.trackId, 7, 'wrote the track id');
deepEqual(tkhd.flags, new Uint8Array([0, 0, 7]), 'flags should equal 7');
equal(tkhd.duration, 100, 'wrote duration into the track header');
equal(tkhd.width, 600, 'wrote width into the track header');
equal(tkhd.height, 300, 'wrote height into the track header');
equal(boxes[0].boxes[1].boxes[1].type, 'mdia', 'generated an mdia type');
equal(boxes[0].boxes[1].boxes[1].boxes.length, 3, 'generated three track media sub boxes');
mdhd = boxes[0].boxes[1].boxes[1].boxes[0];
equal(tkhd.duration,
expected.duration || Math.pow(2, 32) - 1,
'wrote duration into the track header');
equal(tkhd.width, expected.width || 0, 'wrote width into the track header');
equal(tkhd.height, expected.height || 0, 'wrote height into the track header');
equal(tkhd.volume, 1, 'set volume to 1');
};
validateMdia = function(mdia, expected) {
equal(mdia.type, 'mdia', 'generated an mdia type');
equal(mdia.boxes.length, 3, 'generated three track media sub boxes');
validateMdhd(mdia.boxes[0], expected);
validateHdlr(mdia.boxes[1], expected);
validateMinf(mdia.boxes[2], expected);
};
validateMdhd = function(mdhd, expected) {
equal(mdhd.type, 'mdhd', 'generate an mdhd type');
equal(mdhd.language, 'und', 'wrote undetermined language');
equal(mdhd.duration, 100, 'wrote duration into the media header');
equal(mdhd.timescale, expected.timescale || 90000, 'wrote the timescale');
equal(mdhd.duration,
expected.duration || Math.pow(2, 32) - 1,
'wrote duration into the media header');
};
hdlr = boxes[0].boxes[1].boxes[1].boxes[1];
validateHdlr = function(hdlr, expected) {
equal(hdlr.type, 'hdlr', 'generate an hdlr type');
if (expected.type !== 'audio') {
equal(hdlr.handlerType, 'vide', 'wrote a video handler');
equal(hdlr.name, 'VideoHandler', 'wrote the handler name');
} else {
equal(hdlr.handlerType, 'soun', 'wrote a sound handler');
equal(hdlr.name, 'SoundHandler', 'wrote the sound handler name');
}
};
minf = boxes[0].boxes[1].boxes[1].boxes[2];
validateMinf = function(minf, expected) {
equal(minf.type, 'minf', 'generate an minf type');
equal(minf.boxes.length, 3, 'generates three minf sub boxes');
equal(minf.boxes[0].type, 'vmhd', 'generates a vmhd type');
if (expected.type !== 'audio') {
deepEqual({
type: 'vmhd',
size: 20,
......@@ -103,8 +112,21 @@ test('generates a moov', function() {
graphicsmode: 0,
opcolor: new Uint16Array([0, 0, 0])
}, minf.boxes[0], 'generates a vhmd');
} else {
deepEqual({
type: 'smhd',
size: 16,
version: 0,
flags: new Uint8Array([0, 0, 0]),
balance: 0
}, minf.boxes[0], 'generates an smhd');
}
validateDinf(minf.boxes[1]);
validateStbl(minf.boxes[2], expected);
};
equal(minf.boxes[1].type, 'dinf', 'generates a dinf type');
validateDinf = function(dinf) {
deepEqual({
type: 'dinf',
size: 36,
......@@ -120,18 +142,57 @@ test('generates a moov', function() {
flags: new Uint8Array([0, 0, 1])
}]
}]
}, minf.boxes[1], 'generates a dinf');
}, dinf, 'generates a dinf');
};
equal(minf.boxes[2].type, 'stbl', 'generates an stbl type');
validateStbl = function(stbl, expected) {
equal(stbl.type, 'stbl', 'generates an stbl type');
equal(stbl.boxes.length, 5, 'generated five stbl child boxes');
validateStsd(stbl.boxes[0], expected);
deepEqual({
type: 'stbl',
size: 228,
boxes: [{
type: 'stsd',
size: 152,
type: 'stts',
size: 16,
version: 0,
flags: new Uint8Array([0, 0, 0]),
timeToSamples: []
}, stbl.boxes[1], 'generated an stts');
deepEqual({
type: 'stsc',
size: 16,
version: 0,
flags: new Uint8Array([0, 0, 0]),
sampleToChunks: []
}, stbl.boxes[2], 'generated an stsc');
deepEqual({
type: 'stsz',
version: 0,
size: 20,
flags: new Uint8Array([0, 0, 0]),
sampleSize: 0,
entries: []
}, stbl.boxes[3], 'generated an stsz');
deepEqual({
type: 'stco',
size: 16,
version: 0,
flags: new Uint8Array([0, 0, 0]),
sampleDescriptions: [{
chunkOffsets: []
}, stbl.boxes[4], 'generated and stco');
};
validateStsd = function(stsd, expected) {
equal(stsd.type, 'stsd', 'generated an stsd');
equal(stsd.sampleDescriptions.length, 1, 'generated one sample');
if (expected.type !== 'audio') {
validateVideoSample(stsd.sampleDescriptions[0]);
} else {
validateAudioSample(stsd.sampleDescriptions[0]);
}
};
validateVideoSample = function(sample) {
deepEqual(sample, {
type: 'avc1',
size: 136,
dataReferenceIndex: 1,
......@@ -164,38 +225,40 @@ test('generates a moov', function() {
maxBitrate: 3000000,
avgBitrate: 3000000
}]
}]
}, {
type: 'stts',
size: 16,
version: 0,
flags: new Uint8Array([0, 0, 0]),
timeToSamples: []
}, {
type: 'stsc',
size: 16,
version: 0,
flags: new Uint8Array([0, 0, 0]),
sampleToChunks: []
}, {
type: 'stsz',
version: 0,
size: 20,
flags: new Uint8Array([0, 0, 0]),
sampleSize: 0,
entries: []
}, {
type: 'stco',
size: 16,
}, 'generated a video sample');
};
validateAudioSample = function(sample) {
deepEqual(sample, {
type: 'mp4a',
size: 75,
dataReferenceIndex: 1,
channelcount: 2,
samplesize: 16,
samplerate: 48000,
streamDescriptor: {
type: 'esds',
version: 0,
flags: new Uint8Array([0, 0, 0]),
chunkOffsets: []
}]
}, minf.boxes[2], 'generates a stbl');
mvex = boxes[0].boxes[2];
equal(mvex.type, 'mvex', 'generates an mvex type');
size: 39,
esId: 0,
streamPriority: 0,
// these values were hard-coded based on a working audio init segment
decoderConfig: {
avgBitrate: 56000,
maxBitrate: 56000,
bufferSize: 1536,
objectProfileIndication: 64,
streamType: 5
}
}
}, 'generated an audio sample');
};
validateMvex = function(mvex, options) {
options = options || {
sampleDegradationPriority: 1
};
deepEqual({
type: 'mvex',
size: 40,
......@@ -213,17 +276,75 @@ test('generates a moov', function() {
sampleHasRedundancy: 0,
samplePaddingValue: 0,
sampleIsDifferenceSample: true,
sampleDegradationPriority: 1
sampleDegradationPriority: options.sampleDegradationPriority
}]
}, mvex, 'writes a movie extends box');
};
test('generates a video moov', function() {
var
boxes,
data = mp4.moov([{
id: 7,
duration: 100,
width: 600,
height: 300,
type: 'video',
profileIdc: 3,
levelIdc: 5,
profileCompatibility: 7,
sps: [new Uint8Array([0, 1, 2]), new Uint8Array([3, 4, 5])],
pps: [new Uint8Array([6, 7, 8])]
}]);
ok(data, 'box is not null');
boxes = inspectMp4(data);
equal(boxes.length, 1, 'generated a single box');
equal(boxes[0].type, 'moov', 'generated a moov type');
equal(boxes[0].size, data.byteLength, 'generated size');
equal(boxes[0].boxes.length, 3, 'generated three sub boxes');
validateMvhd(boxes[0].boxes[0]);
validateTrak(boxes[0].boxes[1], {
duration: 100,
width: 600,
height: 300
});
validateMvex(boxes[0].boxes[2]);
});
test('generates an audio moov', function() {
var
data = mp4.moov([{
id: 7,
type: 'audio',
channelcount: 2,
samplerate: 48000,
samplesize: 16
}]),
boxes;
ok(data, 'box is not null');
boxes = inspectMp4(data);
equal(boxes.length, 1, 'generated a single box');
equal(boxes[0].type, 'moov', 'generated a moov type');
equal(boxes[0].size, data.byteLength, 'generated size');
equal(boxes[0].boxes.length, 3, 'generated three sub boxes');
validateMvhd(boxes[0].boxes[0]);
validateTrak(boxes[0].boxes[1], {
type: 'audio',
timescale: 48000
});
validateMvex(boxes[0].boxes[2], {
sampleDegradationPriority: 0
});
});
test('generates a sound hdlr', function() {
var boxes, hdlr,
data = mp4.moov([{
duration:100,
width: 600,
height: 300,
type: 'audio'
}]);
......
......@@ -586,6 +586,75 @@ test('can parse a video stsd', function() {
}]);
});
test('can parse an audio stsd', function() {
var data = box('stsd',
0x00, // version 0
0x00, 0x00, 0x00, // flags
0x00, 0x00, 0x00, 0x01, // entry_count
box('mp4a',
0x00, 0x00, 0x00,
0x00, 0x00, 0x00, // reserved
0x00, 0x01, // data_reference_index
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, // reserved
0x00, 0x02, // channelcount
0x00, 0x10, // samplesize
0x00, 0x00, // pre_defined
0x00, 0x00, // reserved
0xbb, 0x80, 0x00, 0x00, // samplerate, fixed-point 16.16
box('esds',
0x00, // version 0
0x00, 0x00, 0x00, // flags
0x03, // tag, ES_DescrTag
0x00, // length
0x00, 0x01, // ES_ID
0x00, // streamDependenceFlag, URL_Flag, reserved, streamPriority
// DecoderConfigDescriptor
0x04, // tag, DecoderConfigDescrTag
0x0d, // length
0x40, // objectProfileIndication, AAC Main
0x15, // streamType, AudioStream. upstream, reserved
0x00, 0x00, 0xff, // bufferSizeDB
0x00, 0x00, 0x00, 0xff, // maxBitrate
0x00, 0x00, 0x00, 0xaa, // avgBitrate
// DecoderSpecificInfo
0x05, // tag, DecoderSpecificInfoTag
0x02, // length
0x11, 0x90, 0x06, 0x01, 0x02))); // decoder specific info
deepEqual(videojs.inspectMp4(new Uint8Array(data)), [{
version: 0,
flags: new Uint8Array([0, 0, 0]),
type: 'stsd',
size: 91,
sampleDescriptions: [{
type: 'mp4a',
dataReferenceIndex: 1,
channelcount: 2,
samplesize: 16,
samplerate: 48000,
size: 75,
streamDescriptor: {
type: 'esds',
version: 0,
size: 39,
flags: new Uint8Array([0, 0, 0]),
esId: 1,
streamPriority: 0,
decoderConfig: {
objectProfileIndication: 0x40,
streamType: 0x05,
bufferSize: 0xff,
maxBitrate: 0xff,
avgBitrate: 0xaa
}
}
}]
}], 'parsed an audio stsd');
});
test('can parse an styp', function() {
deepEqual(videojs.inspectMp4(new Uint8Array(box('styp',
0x61, 0x76, 0x63, 0x31, // major brand
......@@ -845,6 +914,24 @@ test('can parse a sidx', function(){
}]);
});
test('can parse an smhd', function() {
var data = box('smhd',
0x00, // version
0x00, 0x00, 0x00, // flags
0x00, 0xff, // balance, fixed-point 8.8
0x00, 0x00); // reserved
deepEqual(videojs.inspectMp4(new Uint8Array(data)),
[{
type: 'smhd',
size: 16,
version: 0,
flags: new Uint8Array([0, 0, 0]),
balance: 0xff / Math.pow(2, 8)
}],
'parsed an smhd');
});
test('can parse a tfdt', function() {
var data = box('tfdt',
0x00, // version
......
......@@ -129,6 +129,27 @@ var
avgBitrate: view.getUint32(8)
};
},
esds: function(data) {
return {
version: data[0],
flags: new Uint8Array(data.subarray(1, 4)),
esId: (data[6] << 8) | data[7],
streamPriority: data[8] & 0x1f,
decoderConfig: {
objectProfileIndication: data[11],
streamType: (data[12] >>> 2) & 0x3f,
bufferSize: (data[13] << 16) | (data[14] << 8) | data[15],
maxBitrate: (data[16] << 24) |
(data[17] << 16) |
(data[18] << 8) |
data[19],
avgBitrate: (data[20] << 24) |
(data[21] << 16) |
(data[22] << 8) |
data[23]
}
};
},
ftyp: function(data) {
var
view = new DataView(data.buffer, data.byteOffset, data.byteLength),
......@@ -247,6 +268,30 @@ var
boxes: videojs.inspectMp4(data)
};
},
// codingname, not a first-class box type. stsd entries share the
// same format as real boxes so the parsing infrastructure can be
// shared
mp4a: function(data) {
var
view = new DataView(data.buffer, data.byteOffset, data.byteLength),
result = {
// 6 bytes reserved
dataReferenceIndex: view.getUint16(6),
// 4 + 4 bytes reserved
channelcount: view.getUint16(16),
samplesize: view.getUint16(18),
// 2 bytes pre_defined
// 2 bytes reserved
samplerate: view.getUint16(24) + (view.getUint16(26) / 65536)
};
// if there are more bytes to process, assume this is an ISO/IEC
// 14496-14 MP4AudioSampleEntry and parse the ESDBox
if (data.byteLength > 28) {
result.streamDescriptor = videojs.inspectMp4(data.subarray(28))[0];
}
return result;
},
moof: function(data) {
return {
boxes: videojs.inspectMp4(data)
......@@ -357,6 +402,13 @@ var
return result;
},
smhd: function(data) {
return {
version: data[0],
flags: new Uint8Array(data.subarray(1, 4)),
balance: data[4] + (data[5] / 256)
};
},
stbl: function(data) {
return {
boxes: videojs.inspectMp4(data)
......
......@@ -181,8 +181,8 @@
mediaSource.addEventListener('sourceopen', function() {
var
buffer = mediaSource.addSourceBuffer('video/mp4;codecs=avc1.4d400d'),
one = false;
// buffer = mediaSource.addSourceBuffer('video/mp4;codecs=avc1.4d400d');
buffer = mediaSource.addSourceBuffer('audio/mp4;codecs=mp4a.40.2');
buffer.addEventListener('updatestart', logevent);
buffer.addEventListener('updateend', logevent);
buffer.addEventListener('error', logevent);
......@@ -211,27 +211,43 @@
var segment = new Uint8Array(reader.result),
transmuxer = new videojs.mp2t.Transmuxer(),
events = [],
i = 0,
bytesLength = 0,
init = false,
bytes,
hex = '';
transmuxer.on('data', function(data) {
if (data) {
if (data && data.type === 'audio') {
events.push(data.data);
bytesLength += data.data.byteLength;
// XXX Media Sources Testing
if (!init) {
vjsParsed = videojs.inspectMp4(data.data);
console.log('appended tmuxed output');
window.vjsSourceBuffer.appendBuffer(data.data);
init = true;
}
}
});
transmuxer.push(segment);
transmuxer.end();
bytes = new Uint8Array(events[0].byteLength + events[1].byteLength);
bytes.set(events[0]);
bytes.set(events[1], events[0].byteLength);
bytes = new Uint8Array(bytesLength);
i = 0;
while (events.length) {
bytes.set(events[0], i);
i += events[0].byteLength;
events.shift();
}
vjsParsed = videojs.inspectMp4(bytes);
console.log('transmuxed', vjsParsed);
// vjsParsed = videojs.inspectMp4(bytes);
console.log('transmuxed', videojs.inspectMp4(bytes));
diffParsed();
// clear old box info
vjsBoxes.innerHTML = stringify(vjsParsed, null, ' ');
vjsBoxes.innerHTML = stringify(videojs.inspectMp4(bytes), null, ' ');
// write out the result
hex += '<pre>';
......@@ -263,8 +279,7 @@
workingOutput.innerHTML = hex;
// XXX Media Sources Testing
window.vjsSourceBuffer.appendBuffer(bytes);
console.log('appended bytes');
// window.vjsSourceBuffer.appendBuffer(bytes);
});
reader.readAsArrayBuffer(this.files[0]);
}, false);
......
......@@ -76,27 +76,41 @@
// setup the media source
mediaSource.addEventListener('sourceopen', function() {
var buffer = mediaSource.addSourceBuffer('video/mp4;codecs=avc1.4d400d'),
var videoBuffer = mediaSource.addSourceBuffer('video/mp4;codecs=avc1.4d400d'),
audioBuffer = mediaSource.addSourceBuffer('audio/mp4;codecs=mp4a.40.2'),
transmuxer = new videojs.mp2t.Transmuxer(),
segments = [];
videoSegments = [],
audioSegments = [];
// expose the machinery for debugging
window.vjsMediaSource = mediaSource;
window.vjsSourceBuffer = buffer;
window.vjsSourceBuffer = videoBuffer;
window.vjsVideo = demo;
// transmux the MPEG-TS data to BMFF segments
transmuxer.on('data', function(segment) {
segments.push(segment);
if (segment.type === 'video') {
videoSegments.push(segment);
} else {
audioSegments.push(segment);
}
});
transmuxer.push(hazeVideo);
transmuxer.end();
// buffer up the video data
buffer.appendBuffer(segments.shift().data);
buffer.addEventListener('updateend', function() {
if (segments.length) {
buffer.appendBuffer(segments.shift().data);
videoBuffer.appendBuffer(videoSegments.shift().data);
videoBuffer.addEventListener('updateend', function() {
if (videoSegments.length) {
videoBuffer.appendBuffer(videoSegments.shift().data);
}
});
// buffer up the audio data
audioBuffer.appendBuffer(audioSegments.shift().data);
audioBuffer.addEventListener('updateend', function() {
if (audioSegments.length) {
audioBuffer.appendBuffer(audioSegments.shift().data);
}
});
});
......
......@@ -94,8 +94,10 @@
var onMediaSourceOpen = function() {
console.log('on media open');
ms.removeEventListener('sourceopen', onMediaSourceOpen);
var sourceBuffer = ms.addSourceBuffer('video/mp4;codecs="avc1.4D400D"');
sourceBuffer.appendBuffer(bytes);
var videoBuffer = ms.addSourceBuffer('video/mp4;codecs="avc1.4D400D"');
videoBuffer.appendBuffer(bytes);
var audioBuffer = ms.addSourceBuffer('audio/mp4;codecs=mp4a.40.2');
};
ms.addEventListener('sourceopen', onMediaSourceOpen);
......
......@@ -47,7 +47,9 @@ var
validateTrack,
validateTrackFragment,
videoPes;
transportPacket,
videoPes,
audioPes;
module('MP2T Packet Stream', {
setup: function() {
......@@ -397,15 +399,22 @@ test('parses an elementary stream packet with a pts and dts', function() {
equal(2 / 90, packet.dts, 'parsed the dts');
});
// helper function to create video PES packets
videoPes = function(data, first) {
/**
* Helper function to create transport stream PES packets
* @param pid {uint8} - the program identifier (PID)
* @param data {arraylike} - the payload bytes
* @payload first {boolean} - true if this PES should be a payload
* unit start
*/
transportPacket = function(pid, data, first) {
var
adaptationFieldLength = 188 - data.length - (first ? 18 : 17),
adaptationFieldLength = 188 - data.length - (first ? 15 : 14),
// transport_packet(), Rec. ITU-T H.222.0, Table 2-2
result = [
// sync byte
0x47,
// tei:0 pusi:1 tp:0 pid:0 0000 0001 0001
0x40, 0x11,
0x40, pid,
// tsc:01 afc:11 cc:0000
0x70
].concat([
......@@ -422,6 +431,7 @@ videoPes = function(data, first) {
result.push(0xff);
}
// PES_packet(), Rec. ITU-T H.222.0, Table 2-21
result = result.concat([
// pscp:0000 0000 0000 0000 0000 0001
0x00, 0x00, 0x01,
......@@ -437,14 +447,41 @@ videoPes = function(data, first) {
if (first) {
result.push(0x00);
}
result = result.concat([
return result.concat(data);
};
/**
* Helper function to create video PES packets
* @param data {arraylike} - the payload bytes
* @payload first {boolean} - true if this PES should be a payload
* unit start
*/
videoPes = function(data, first) {
return transportPacket(0x11, [
// NAL unit start code
0x00, 0x00, 0x01
].concat(data));
return result;
].concat(data), first);
};
standalonePes = videoPes([0xaf, 0x01], true);
/**
* Helper function to create audio PES packets
* @param data {arraylike} - the payload bytes
* @payload first {boolean} - true if this PES should be a payload
* unit start
*/
audioPes = function(data, first) {
var frameLength = data.length + 7;
return transportPacket(0x12, [
0xff, 0xf1, // no CRC
0x10, // AAC Main, 44.1KHz
0xb0 | ((frameLength & 0x1800) >> 11), // 2 channels
(frameLength & 0x7f8) >> 3,
((frameLength & 0x07) << 5) + 7, // frame length in bytes
0x00 // one AAC per ADTS frame
].concat(data), first);
};
test('parses an elementary stream packet without a pts or dts', function() {
var packet;
......@@ -951,8 +988,8 @@ test('generates AAC frame events from ADTS bytes', function() {
type: 'audio',
data: new Uint8Array([
0xff, 0xf1, // no CRC
0x00, // AAC Main, 44.1KHz
0xfc, 0x01, 0x20, // frame length 9 bytes
0x10, // AAC Main, 44.1KHz
0xbc, 0x01, 0x20, // 2 channels, frame length 9 bytes
0x00, // one AAC per ADTS frame
0x12, 0x34, // AAC payload
0x56, 0x78 // extra junk that should be ignored
......@@ -961,6 +998,13 @@ test('generates AAC frame events from ADTS bytes', function() {
equal(frames.length, 1, 'generated one frame');
deepEqual(frames[0].data, new Uint8Array([0x12, 0x34]), 'extracted AAC frame');
equal(frames[0].channelcount, 2, 'parsed channelcount');
equal(frames[0].samplerate, 44100, 'parsed samplerate');
// Chrome only supports 8, 16, and 32 bit sample sizes. Assuming the
// default value of 16 in ISO/IEC 14496-12 AudioSampleEntry is
// acceptable.
equal(frames[0].samplesize, 16, 'parsed samplesize');
});
// not handled: ADTS with CRC
......@@ -972,7 +1016,7 @@ module('Transmuxer', {
}
});
test('generates an init segment', function() {
test('generates a video init segment', function() {
var segments = [];
transmuxer.on('data', function(segment) {
segments.push(segment);
......@@ -980,16 +1024,38 @@ test('generates an init segment', function() {
transmuxer.push(packetize(PAT));
transmuxer.push(packetize(PMT));
transmuxer.push(packetize(videoPes([
0x07,
0x08, 0x01 // pic_parameter_set_rbsp
], true)));
transmuxer.push(packetize(videoPes([
0x07, // seq_parameter_set_rbsp
0x27, 0x42, 0xe0, 0x0b,
0xa9, 0x18, 0x60, 0x9d,
0x80, 0x53, 0x06, 0x01,
0x06, 0xb6, 0xc2, 0xb5,
0xef, 0x7c, 0x04
], false)));
transmuxer.end();
equal(segments.length, 2, 'generated init and media segments');
ok(segments[0].data, 'wrote data in the init segment');
equal(segments[0].type, 'video', 'video is the segment type');
});
test('generates an audio init segment', function() {
var segments = [];
transmuxer.on('data', function(segment) {
segments.push(segment);
});
transmuxer.push(packetize(PAT));
transmuxer.push(packetize(PMT));
transmuxer.push(packetize(audioPes([
0x00, 0x01
], true)));
transmuxer.end();
equal(segments.length, 1, 'has an init segment');
equal(segments.length, 2, 'generated init and media segments');
ok(segments[0].data, 'wrote data in the init segment');
equal(segments[0].type, 'audio', 'audio is the segment type');
});
test('buffers video samples until ended', function() {
......@@ -1123,20 +1189,26 @@ validateTrackFragment = function(track, segment, metadata) {
test('parses an example mp2t file and generates media segments', function() {
var
segments = [],
videoSegments = [],
audioSegments = [],
sequenceNumber = window.Infinity,
i, boxes, mfhd;
transmuxer.on('data', function(segment) {
segments.push(segment);
if (segment.type === 'video') {
videoSegments.push(segment);
} else if (segment.type === 'audio') {
audioSegments.push(segment);
}
});
transmuxer.push(window.bcSegment);
transmuxer.end();
equal(segments.length, 2, 'generated two segments');
equal(videoSegments.length, 2, 'generated two video segments');
equal(audioSegments.length, 2, 'generated two audio segments');
boxes = videojs.inspectMp4(segments[0].data);
equal(boxes.length, 2, 'init segments are composed of two boxes');
boxes = videojs.inspectMp4(videoSegments[0].data);
equal(boxes.length, 2, 'video init segments are composed of two boxes');
equal(boxes[0].type, 'ftyp', 'the first box is an ftyp');
equal(boxes[1].type, 'moov', 'the second box is a moov');
equal(boxes[1].boxes[0].type, 'mvhd', 'generated an mvhd');
......@@ -1150,9 +1222,9 @@ test('parses an example mp2t file and generates media segments', function() {
// });
// equal(boxes[1].boxes[3].type, 'mvex', 'generated an mvex');
boxes = videojs.inspectMp4(segments[1].data);
ok(boxes.length > 0, 'media segments are not empty');
ok(boxes.length % 2 === 0, 'media segments are composed of pairs of boxes');
boxes = videojs.inspectMp4(videoSegments[1].data);
ok(boxes.length > 0, 'video media segments are not empty');
ok(boxes.length % 2 === 0, 'video media segments are composed of pairs of boxes');
for (i = 0; i < boxes.length; i += 2) {
equal(boxes[i].type, 'moof', 'first box is a moof');
equal(boxes[i].boxes.length, 2, 'the moof has two children');
......@@ -1163,7 +1235,7 @@ test('parses an example mp2t file and generates media segments', function() {
sequenceNumber = mfhd.sequenceNumber;
equal(boxes[i + 1].type, 'mdat', 'second box is an mdat');
validateTrackFragment(boxes[i].boxes[1], segments[1].data, {
validateTrackFragment(boxes[i].boxes[1], videoSegments[1].data, {
trackId: 256,
width: 388,
height: 300,
......