def510ae by David LaPalomento

Generate a valid audio initialization segment

Modify the mp4 generator to inspect audio tracks and generate a working initialization segment. Hook the audio init segment up to the mp4 transmuxing test page.
1 parent 458da175
......@@ -4,7 +4,7 @@
var box, dinf, ftyp, mdat, mfhd, minf, moof, moov, mvex, mvhd, trak,
tkhd, mdia, mdhd, hdlr, sdtp, stbl, stsd, styp, traf, trex, trun,
types, MAJOR_BRAND, MINOR_VERSION, AVC1_BRAND, VIDEO_HDLR,
AUDIO_HDLR, HDLR_TYPES, VMHD, DREF, STCO, STSC, STSZ, STTS,
AUDIO_HDLR, HDLR_TYPES, ESDS, VMHD, SMHD, DREF, STCO, STSC, STSZ, STTS,
Uint8Array, DataView;
Uint8Array = window.Uint8Array;
......@@ -19,6 +19,7 @@ DataView = window.DataView;
btrt: [],
dinf: [],
dref: [],
esds: [],
ftyp: [],
hdlr: [],
mdat: [],
......@@ -28,9 +29,11 @@ DataView = window.DataView;
minf: [],
moof: [],
moov: [],
mp4a: [], // codingname
mvex: [],
mvhd: [],
sdtp: [],
smhd: [],
stbl: [],
stco: [],
stsc: [],
......@@ -109,6 +112,39 @@ DataView = window.DataView;
0x00, // version 0
0x00, 0x00, 0x01 // entry_flags
]);
ESDS = new Uint8Array([
0x00, // version
0x00, 0x00, 0x00, // flags
// ES_Descriptor
0x03, // tag, ES_DescrTag
0x19, // length
0x00, 0x00, // ES_ID
0x00, // streamDependenceFlag, URL_flag, reserved, streamPriority
// DecoderConfigDescriptor
0x04, // tag, DecoderConfigDescrTag
0x11, // length
0x40, // object type
0x15, // streamType
0x00, 0x06, 0x00, // bufferSizeDB
0x00, 0x00, 0xda, 0xc0, // maxBitrate
0x00, 0x00, 0xda, 0xc0, // avgBitrate
// DecoderSpecificInfo
0x05, // tag, DecoderSpecificInfoTag
0x02, // length
// ISO/IEC 14496-3, AudioSpecificConfig
0x11, // AudioObjectType, AAC LC.
0x90, // samplingFrequencyIndex, 8 -> 16000. channelConfig, 2 -> stereo.
0x06, 0x01, 0x02 // GASpecificConfig
]);
SMHD = new Uint8Array([
0x00, // version
0x00, 0x00, 0x00, // flags
0x00, 0x00, // balance, 0 means centered
0x00, 0x00 // reserved
]);
STCO = new Uint8Array([
0x00, // version
0x00, 0x00, 0x00, // flags
......@@ -171,24 +207,35 @@ hdlr = function(type) {
mdat = function(data) {
return box(types.mdat, data);
};
mdhd = function(duration) {
return box(types.mdhd, new Uint8Array([
0x00, // version 0
0x00, 0x00, 0x00, // flags
mdhd = function(track) {
var result = new Uint8Array([
0x00, // version 0
0x00, 0x00, 0x00, // flags
0x00, 0x00, 0x00, 0x02, // creation_time
0x00, 0x00, 0x00, 0x03, // modification_time
0x00, 0x01, 0x5f, 0x90, // timescale, 90,000 "ticks" per second
(duration & 0xFF000000) >> 24,
(duration & 0xFF0000) >> 16,
(duration & 0xFF00) >> 8,
duration & 0xFF, // duration
0x55, 0xc4, // 'und' language (undetermined)
(track.duration >>> 24),
(track.duration >>> 16) & 0xFF,
(track.duration >>> 8) & 0xFF,
track.duration & 0xFF, // duration
0x55, 0xc4, // 'und' language (undetermined)
0x00, 0x00
]));
]);
// Use the sample rate from the track metadata, when it is
// defined. The sample rate can be parsed out of an ADTS header, for
// instance.
if (track.samplerate) {
result[12] = (track.samplerate >>> 24);
result[13] = (track.samplerate >>> 16) & 0xFF;
result[14] = (track.samplerate >>> 8) & 0xFF;
result[15] = (track.samplerate) & 0xFF;
}
return box(types.mdhd, result);
};
mdia = function(track) {
return box(types.mdia, mdhd(track.duration), hdlr(track.type), minf(track));
return box(types.mdia, mdhd(track), hdlr(track.type), minf(track));
};
mfhd = function(sequenceNumber) {
return box(types.mfhd, new Uint8Array([
......@@ -201,7 +248,10 @@ mfhd = function(sequenceNumber) {
]));
};
minf = function(track) {
return box(types.minf, box(types.vmhd, VMHD), dinf(), stbl(track));
return box(types.minf,
track.type === 'video' ? box(types.vmhd, VMHD) : box(types.smhd, SMHD),
dinf(),
stbl(track));
};
moof = function(sequenceNumber, tracks) {
var
......@@ -217,7 +267,9 @@ moof = function(sequenceNumber, tracks) {
].concat(trackFragments));
};
/**
* @param tracks... (optional) {array} the tracks associated with this movie
* Returns a movie box.
* @param tracks {array} the tracks associated with this movie
* @see ISO/IEC 14496-12:2012(E), section 8.2.1
*/
moov = function(tracks) {
var
......@@ -307,32 +359,36 @@ stbl = function(track) {
box(types.stco, STCO));
};
stsd = function(track) {
var sequenceParameterSets = [], pictureParameterSets = [], i;
(function() {
var videoSample, audioSample;
if (track.type === 'audio') {
return box(types.stsd);
}
stsd = function(track) {
// assemble the SPSs
for (i = 0; i < track.sps.length; i++) {
sequenceParameterSets.push((track.sps[i].byteLength & 0xFF00) >>> 8);
sequenceParameterSets.push((track.sps[i].byteLength & 0xFF)); // sequenceParameterSetLength
sequenceParameterSets = sequenceParameterSets.concat(Array.prototype.slice.call(track.sps[i])); // SPS
}
return box(types.stsd, new Uint8Array([
0x00, // version 0
0x00, 0x00, 0x00, // flags
0x00, 0x00, 0x00, 0x01
]), track.type === 'video' ? videoSample(track) : audioSample(track));
};
// assemble the PPSs
for (i = 0; i < track.pps.length; i++) {
pictureParameterSets.push((track.pps[i].byteLength & 0xFF00) >>> 8);
pictureParameterSets.push((track.pps[i].byteLength & 0xFF));
pictureParameterSets = pictureParameterSets.concat(Array.prototype.slice.call(track.pps[i]));
}
videoSample = function(track) {
var sequenceParameterSets = [], pictureParameterSets = [], i;
return box(types.stsd, new Uint8Array([
0x00, // version 0
0x00, 0x00, 0x00, // flags
0x00, 0x00, 0x00, 0x01]),
box(types.avc1, new Uint8Array([
// assemble the SPSs
for (i = 0; i < track.sps.length; i++) {
sequenceParameterSets.push((track.sps[i].byteLength & 0xFF00) >>> 8);
sequenceParameterSets.push((track.sps[i].byteLength & 0xFF)); // sequenceParameterSetLength
sequenceParameterSets = sequenceParameterSets.concat(Array.prototype.slice.call(track.sps[i])); // SPS
}
// assemble the PPSs
for (i = 0; i < track.pps.length; i++) {
pictureParameterSets.push((track.pps[i].byteLength & 0xFF00) >>> 8);
pictureParameterSets.push((track.pps[i].byteLength & 0xFF));
pictureParameterSets = pictureParameterSets.concat(Array.prototype.slice.call(track.pps[i]));
}
return box(types.avc1, new Uint8Array([
0x00, 0x00, 0x00,
0x00, 0x00, 0x00, // reserved
0x00, 0x01, // data_reference_index
......@@ -359,31 +415,60 @@ stsd = function(track) {
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, // compressorname
0x00, 0x18, // depth = 24
0x11, 0x11]), // pre_defined = -1
box(types.avcC, new Uint8Array([
0x01, // configurationVersion
track.profileIdc, // AVCProfileIndication
track.profileCompatibility, // profile_compatibility
track.levelIdc, // AVCLevelIndication
0xff // lengthSizeMinusOne, hard-coded to 4 bytes
].concat([
track.sps.length // numOfSequenceParameterSets
]).concat(sequenceParameterSets).concat([
track.pps.length // numOfPictureParameterSets
]).concat(pictureParameterSets))), // "PPS"
box(types.btrt, new Uint8Array([
0x00, 0x1c, 0x9c, 0x80, // bufferSizeDB
0x00, 0x2d, 0xc6, 0xc0, // maxBitrate
0x00, 0x2d, 0xc6, 0xc0])) // avgBitrate
));
};
0x11, 0x11 // pre_defined = -1
]), box(types.avcC, new Uint8Array([
0x01, // configurationVersion
track.profileIdc, // AVCProfileIndication
track.profileCompatibility, // profile_compatibility
track.levelIdc, // AVCLevelIndication
0xff // lengthSizeMinusOne, hard-coded to 4 bytes
].concat([
track.sps.length // numOfSequenceParameterSets
]).concat(sequenceParameterSets).concat([
track.pps.length // numOfPictureParameterSets
]).concat(pictureParameterSets))), // "PPS"
box(types.btrt, new Uint8Array([
0x00, 0x1c, 0x9c, 0x80, // bufferSizeDB
0x00, 0x2d, 0xc6, 0xc0, // maxBitrate
0x00, 0x2d, 0xc6, 0xc0
])) // avgBitrate
);
};
audioSample = function(track) {
return box(types.mp4a, new Uint8Array([
// SampleEntry, ISO/IEC 14496-12
0x00, 0x00, 0x00,
0x00, 0x00, 0x00, // reserved
0x00, 0x01, // data_reference_index
// AudioSampleEntry, ISO/IEC 14496-12
0x00, 0x00, 0x00, 0x00, // reserved
0x00, 0x00, 0x00, 0x00, // reserved
(track.channelcount & 0xff00) >> 8,
(track.channelcount & 0xff), // channelcount
(track.samplesize & 0xff00) >> 8,
(track.samplesize & 0xff), // samplesize
0x00, 0x00, // pre_defined
0x00, 0x00, // reserved
(track.samplerate & 0xff00) >> 8,
(track.samplerate & 0xff),
0x00, 0x00 // samplerate, 16.16
// MP4AudioSampleEntry, ISO/IEC 14496-14
]), box(types.esds, ESDS));
};
})();
styp = function() {
return box(types.styp, MAJOR_BRAND, MINOR_VERSION, MAJOR_BRAND);
};
tkhd = function(track) {
return box(types.tkhd, new Uint8Array([
var result = new Uint8Array([
0x00, // version 0
0x00, 0x00, 0x07, // flags
0x00, 0x00, 0x00, 0x00, // creation_time
......@@ -401,7 +486,7 @@ tkhd = function(track) {
0x00, 0x00, 0x00, 0x00, // reserved
0x00, 0x00, // layer
0x00, 0x00, // alternate_group
0x00, 0x00, // non-audio track volume
0x01, 0x00, // non-audio track volume
0x00, 0x00, // reserved
0x00, 0x01, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
......@@ -418,7 +503,9 @@ tkhd = function(track) {
(track.height & 0xFF00) >> 8,
track.height & 0xFF,
0x00, 0x00 // height
]));
]);
return box(types.tkhd, result);
};
traf = function(track) {
......@@ -461,7 +548,7 @@ trak = function(track) {
};
trex = function(track) {
return box(types.trex, new Uint8Array([
var result = new Uint8Array([
0x00, // version 0
0x00, 0x00, 0x00, // flags
(track.id & 0xFF000000) >> 24,
......@@ -472,7 +559,16 @@ trex = function(track) {
0x00, 0x00, 0x00, 0x00, // default_sample_duration
0x00, 0x00, 0x00, 0x00, // default_sample_size
0x00, 0x01, 0x00, 0x01 // default_sample_flags
]));
]);
// the last two bytes of default_sample_flags is the sample
// degradation priority, a hint about the importance of this sample
// relative to others. Lower the degradation priority for all sample
// types other than video.
if (track.type !== 'video') {
result[result.length - 1] = 0x00;
}
return box(types.trex, result);
};
trun = function(track, offset) {
......
......@@ -16,12 +16,29 @@
var
TransportPacketStream, TransportParseStream, ElementaryStream, VideoSegmentStream,
Transmuxer, AacStream, H264Stream, NalByteStream,
MP2T_PACKET_LENGTH, H264_STREAM_TYPE, ADTS_STREAM_TYPE, mp4;
AudioSegmentStream, Transmuxer, AacStream, H264Stream, NalByteStream,
MP2T_PACKET_LENGTH, H264_STREAM_TYPE, ADTS_STREAM_TYPE,
ADTS_SAMPLING_FREQUENCIES, mp4;
MP2T_PACKET_LENGTH = 188; // bytes
H264_STREAM_TYPE = 0x1b;
ADTS_STREAM_TYPE = 0x0f;
ADTS_SAMPLING_FREQUENCIES = [
96000,
88200,
64000,
48000,
44100,
32000,
24000,
22050,
16000,
12000,
11025,
8000,
7350
];
mp4 = videojs.mp4;
/**
......@@ -438,6 +455,11 @@ AacStream = function() {
// deliver the AAC frame
this.trigger('data', {
channelcount: ((buffer[i + 1] & 1) << 3) |
((buffer[i + 2] & 0xc0) >> 6),
samplerate: ADTS_SAMPLING_FREQUENCIES[(buffer[i + 1] & 0x3c) >> 2],
// assume ISO/IEC 14496-12 AudioSampleEntry default of 16
samplesize: 16,
data: buffer.subarray(i + 6, i + frameLength - 1)
});
......@@ -457,6 +479,62 @@ AacStream = function() {
AacStream.prototype = new videojs.Hls.Stream();
/**
* Constructs a single-track, ISO BMFF media segment from AAC data
* events. The output of this stream can be fed to a SourceBuffer
* configured with a suitable initialization segment.
*/
// TODO: share common code with VideoSegmentStream
AudioSegmentStream = function(track) {
var aacFrames = [], aacFramesLength = 0, sequenceNumber = 0;
AudioSegmentStream.prototype.init.call(this);
this.push = function(data) {
// buffer audio data until end() is called
aacFrames.push(data);
aacFramesLength += data.data.byteLength;
};
this.end = function() {
var boxes, currentFrame, data, sample, i, mdat, moof;
// return early if no audio data has been observed
if (aacFramesLength === 0) {
return;
}
// concatenate the audio data to constuct the mdat
data = new Uint8Array(aacFramesLength);
track.samples = [];
while (aacFramesLength.length) {
currentFrame = aacFrames[0];
sample = {
size: currentFrame.data.byteLength,
duration: 1024 // FIXME calculate for realz
};
track.samples.push(sample);
data.set(currentFrame.data, i);
i += currentFrame.data.byteLength;
aacFrames.shift();
}
aacFramesLength = 0;
mdat = mp4.mdat(data);
moof = mp4.moof(sequenceNumber, [track]);
boxes = new Uint8Array(moof.byteLength + mdat.byteLength);
// bump the sequence number for next time
sequenceNumber++;
boxes.set(moof);
boxes.set(mdat, moof.byteLength);
this.trigger('data', boxes);
};
};
AudioSegmentStream.prototype = new videojs.Hls.Stream();
/**
* Accepts a NAL unit byte stream and unpacks the embedded NAL units.
*/
NalByteStream = function() {
......@@ -539,7 +617,7 @@ NalByteStream = function() {
this.end = function() {
// deliver the last buffered NAL unit
if (buffer.byteLength > 3) {
if (buffer && buffer.byteLength > 3) {
this.trigger('data', buffer.subarray(syncPoint + 3));
}
};
......@@ -763,12 +841,19 @@ VideoSegmentStream = function(track) {
this.end = function() {
var startUnit, currentNal, moof, mdat, boxes, i, data, view, sample;
// return early if no video data has been observed
if (nalUnitsLength === 0) {
return;
}
// concatenate the video data and construct the mdat
// first, we have to build the index from byte locations to
// samples (that is, frames) in the video data
data = new Uint8Array(nalUnitsLength + (4 * nalUnits.length));
view = new DataView(data.buffer);
track.samples = [];
// see ISO/IEC 14496-12:2012, section 8.6.4.3
sample = {
size: 0,
flags: {
......@@ -853,11 +938,14 @@ VideoSegmentStream.prototype = new videojs.Hls.Stream();
Transmuxer = function() {
var
self = this,
track,
videoTrack,
audioTrack,
config,
pps,
packetStream, parseStream, elementaryStream, aacStream, h264Stream, videoSegmentStream;
packetStream, parseStream, elementaryStream,
aacStream, h264Stream,
videoSegmentStream, audioSegmentStream;
Transmuxer.prototype.init.call(this);
......@@ -880,51 +968,78 @@ Transmuxer = function() {
!config) {
config = data.config;
track.width = config.width;
track.height = config.height;
track.sps = [data.data];
track.profileIdc = config.profileIdc;
track.levelIdc = config.levelIdc;
track.profileCompatibility = config.profileCompatibility;
videoTrack.width = config.width;
videoTrack.height = config.height;
videoTrack.sps = [data.data];
videoTrack.profileIdc = config.profileIdc;
videoTrack.levelIdc = config.levelIdc;
videoTrack.profileCompatibility = config.profileCompatibility;
// generate an init segment once all the metadata is available
if (pps) {
self.trigger('data', {
data: videojs.mp4.initSegment([track])
type: 'video',
data: videojs.mp4.initSegment([videoTrack])
});
}
}
if (data.nalUnitType === 'pic_parameter_set_rbsp' &&
!pps) {
pps = data.data;
track.pps = [data.data];
videoTrack.pps = [data.data];
if (config) {
self.trigger('data', {
data: videojs.mp4.initSegment([track])
type: 'video',
data: videojs.mp4.initSegment([videoTrack])
});
}
}
});
// hook up the video segment stream once track metadata is delivered
elementaryStream.on('data', function(data) {
var i, triggerData = function(segment) {
// generate an init segment based on the first audio sample
aacStream.on('data', function(data) {
if (audioTrack && audioTrack.channelcount === undefined) {
audioTrack.channelcount = data.channelcount;
audioTrack.samplerate = data.samplerate;
audioTrack.samplesize = data.samplesize;
self.trigger('data', {
data: segment
type: 'audio',
data: videojs.mp4.initSegment([audioTrack])
});
}
});
// hook up the segment streams once track metadata is delivered
elementaryStream.on('data', function(data) {
var i, triggerData = function(type) {
return function(segment) {
self.trigger('data', {
type: type,
data: segment
});
};
};
if (data.type === 'metadata') {
i = data.tracks.length;
// scan the tracks listed in the metadata
while (i--) {
if (data.tracks[i].type === 'video') {
track = data.tracks[i];
if (!videoSegmentStream) {
videoSegmentStream = new VideoSegmentStream(track);
h264Stream.pipe(videoSegmentStream);
videoSegmentStream.on('data', triggerData);
}
// hook up the video segment stream to the first track with h264 data
if (data.tracks[i].type === 'video' && !videoSegmentStream) {
videoTrack = data.tracks[i];
videoSegmentStream = new VideoSegmentStream(videoTrack);
h264Stream.pipe(videoSegmentStream);
videoSegmentStream.on('data', triggerData('video'));
break;
}
// hook up the audio segment stream to the first track with aac data
if (data.tracks[i].type === 'audio' && !audioSegmentStream) {
audioTrack = data.tracks[i];
audioSegmentStream = new AudioSegmentStream(audioTrack);
aacStream.pipe(audioSegmentStream);
audioSegmentStream.on('data', triggerData('audio'));
}
}
}
});
......@@ -938,6 +1053,7 @@ Transmuxer = function() {
elementaryStream.end();
h264Stream.end();
videoSegmentStream.end();
audioSegmentStream.end();
};
};
Transmuxer.prototype = new videojs.Hls.Stream();
......
......@@ -586,6 +586,75 @@ test('can parse a video stsd', function() {
}]);
});
test('can parse an audio stsd', function() {
var data = box('stsd',
0x00, // version 0
0x00, 0x00, 0x00, // flags
0x00, 0x00, 0x00, 0x01, // entry_count
box('mp4a',
0x00, 0x00, 0x00,
0x00, 0x00, 0x00, // reserved
0x00, 0x01, // data_reference_index
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, // reserved
0x00, 0x02, // channelcount
0x00, 0x10, // samplesize
0x00, 0x00, // pre_defined
0x00, 0x00, // reserved
0xbb, 0x80, 0x00, 0x00, // samplerate, fixed-point 16.16
box('esds',
0x00, // version 0
0x00, 0x00, 0x00, // flags
0x03, // tag, ES_DescrTag
0x00, // length
0x00, 0x01, // ES_ID
0x00, // streamDependenceFlag, URL_Flag, reserved, streamPriority
// DecoderConfigDescriptor
0x04, // tag, DecoderConfigDescrTag
0x0d, // length
0x40, // objectProfileIndication, AAC Main
0x15, // streamType, AudioStream. upstream, reserved
0x00, 0x00, 0xff, // bufferSizeDB
0x00, 0x00, 0x00, 0xff, // maxBitrate
0x00, 0x00, 0x00, 0xaa, // avgBitrate
// DecoderSpecificInfo
0x05, // tag, DecoderSpecificInfoTag
0x02, // length
0x11, 0x90, 0x06, 0x01, 0x02))); // decoder specific info
deepEqual(videojs.inspectMp4(new Uint8Array(data)), [{
version: 0,
flags: new Uint8Array([0, 0, 0]),
type: 'stsd',
size: 91,
sampleDescriptions: [{
type: 'mp4a',
dataReferenceIndex: 1,
channelcount: 2,
samplesize: 16,
samplerate: 48000,
size: 75,
streamDescriptor: {
type: 'esds',
version: 0,
size: 39,
flags: new Uint8Array([0, 0, 0]),
esId: 1,
streamPriority: 0,
decoderConfig: {
objectProfileIndication: 0x40,
streamType: 0x05,
bufferSize: 0xff,
maxBitrate: 0xff,
avgBitrate: 0xaa
}
}
}]
}], 'parsed an audio stsd');
});
test('can parse an styp', function() {
deepEqual(videojs.inspectMp4(new Uint8Array(box('styp',
0x61, 0x76, 0x63, 0x31, // major brand
......@@ -845,6 +914,24 @@ test('can parse a sidx', function(){
}]);
});
test('can parse an smhd', function() {
var data = box('smhd',
0x00, // version
0x00, 0x00, 0x00, // flags
0x00, 0xff, // balance, fixed-point 8.8
0x00, 0x00); // reserved
deepEqual(videojs.inspectMp4(new Uint8Array(data)),
[{
type: 'smhd',
size: 16,
version: 0,
flags: new Uint8Array([0, 0, 0]),
balance: 0xff / Math.pow(2, 8)
}],
'parsed an smhd');
});
test('can parse a tfdt', function() {
var data = box('tfdt',
0x00, // version
......
......@@ -129,6 +129,27 @@ var
avgBitrate: view.getUint32(8)
};
},
esds: function(data) {
return {
version: data[0],
flags: new Uint8Array(data.subarray(1, 4)),
esId: (data[6] << 8) | data[7],
streamPriority: data[8] & 0x1f,
decoderConfig: {
objectProfileIndication: data[11],
streamType: (data[12] >>> 2) & 0x3f,
bufferSize: (data[13] << 16) | (data[14] << 8) | data[15],
maxBitrate: (data[16] << 24) |
(data[17] << 16) |
(data[18] << 8) |
data[19],
avgBitrate: (data[20] << 24) |
(data[21] << 16) |
(data[22] << 8) |
data[23]
}
};
},
ftyp: function(data) {
var
view = new DataView(data.buffer, data.byteOffset, data.byteLength),
......@@ -247,6 +268,30 @@ var
boxes: videojs.inspectMp4(data)
};
},
// codingname, not a first-class box type. stsd entries share the
// same format as real boxes so the parsing infrastructure can be
// shared
mp4a: function(data) {
var
view = new DataView(data.buffer, data.byteOffset, data.byteLength),
result = {
// 6 bytes reserved
dataReferenceIndex: view.getUint16(6),
// 4 + 4 bytes reserved
channelcount: view.getUint16(16),
samplesize: view.getUint16(18),
// 2 bytes pre_defined
// 2 bytes reserved
samplerate: view.getUint16(24) + (view.getUint16(26) / 65536)
};
// if there are more bytes to process, assume this is an ISO/IEC
// 14496-14 MP4AudioSampleEntry and parse the ESDBox
if (data.byteLength > 28) {
result.streamDescriptor = videojs.inspectMp4(data.subarray(28))[0];
}
return result;
},
moof: function(data) {
return {
boxes: videojs.inspectMp4(data)
......@@ -357,6 +402,13 @@ var
return result;
},
smhd: function(data) {
return {
version: data[0],
flags: new Uint8Array(data.subarray(1, 4)),
balance: data[4] + (data[5] / 256)
};
},
stbl: function(data) {
return {
boxes: videojs.inspectMp4(data)
......
......@@ -181,8 +181,8 @@
mediaSource.addEventListener('sourceopen', function() {
var
buffer = mediaSource.addSourceBuffer('video/mp4;codecs=avc1.4d400d'),
one = false;
// buffer = mediaSource.addSourceBuffer('video/mp4;codecs=avc1.4d400d');
buffer = mediaSource.addSourceBuffer('audio/mp4;codecs=mp4a.40.2');
buffer.addEventListener('updatestart', logevent);
buffer.addEventListener('updateend', logevent);
buffer.addEventListener('error', logevent);
......@@ -211,27 +211,43 @@
var segment = new Uint8Array(reader.result),
transmuxer = new videojs.mp2t.Transmuxer(),
events = [],
i = 0,
bytesLength = 0,
init = false,
bytes,
hex = '';
transmuxer.on('data', function(data) {
if (data) {
if (data && data.type === 'audio') {
events.push(data.data);
bytesLength += data.data.byteLength;
// XXX Media Sources Testing
if (!init) {
vjsParsed = videojs.inspectMp4(data.data);
console.log('appended tmuxed output');
window.vjsSourceBuffer.appendBuffer(data.data);
init = true;
}
}
});
transmuxer.push(segment);
transmuxer.end();
bytes = new Uint8Array(events[0].byteLength + events[1].byteLength);
bytes.set(events[0]);
bytes.set(events[1], events[0].byteLength);
bytes = new Uint8Array(bytesLength);
i = 0;
while (events.length) {
bytes.set(events[0], i);
i += events[0].byteLength;
events.shift();
}
vjsParsed = videojs.inspectMp4(bytes);
console.log('transmuxed', vjsParsed);
// vjsParsed = videojs.inspectMp4(bytes);
console.log('transmuxed', videojs.inspectMp4(bytes));
diffParsed();
// clear old box info
vjsBoxes.innerHTML = stringify(vjsParsed, null, ' ');
vjsBoxes.innerHTML = stringify(videojs.inspectMp4(bytes), null, ' ');
// write out the result
hex += '<pre>';
......@@ -263,8 +279,7 @@
workingOutput.innerHTML = hex;
// XXX Media Sources Testing
window.vjsSourceBuffer.appendBuffer(bytes);
console.log('appended bytes');
// window.vjsSourceBuffer.appendBuffer(bytes);
});
reader.readAsArrayBuffer(this.files[0]);
}, false);
......
......@@ -76,27 +76,41 @@
// setup the media source
mediaSource.addEventListener('sourceopen', function() {
var buffer = mediaSource.addSourceBuffer('video/mp4;codecs=avc1.4d400d'),
var videoBuffer = mediaSource.addSourceBuffer('video/mp4;codecs=avc1.4d400d'),
audioBuffer = mediaSource.addSourceBuffer('audio/mp4;codecs=mp4a.40.2'),
transmuxer = new videojs.mp2t.Transmuxer(),
segments = [];
videoSegments = [],
audioSegments = [];
// expose the machinery for debugging
window.vjsMediaSource = mediaSource;
window.vjsSourceBuffer = buffer;
window.vjsSourceBuffer = videoBuffer;
window.vjsVideo = demo;
// transmux the MPEG-TS data to BMFF segments
transmuxer.on('data', function(segment) {
segments.push(segment);
if (segment.type === 'video') {
videoSegments.push(segment);
} else {
audioSegments.push(segment);
}
});
transmuxer.push(hazeVideo);
transmuxer.end();
// buffer up the video data
buffer.appendBuffer(segments.shift().data);
buffer.addEventListener('updateend', function() {
if (segments.length) {
buffer.appendBuffer(segments.shift().data);
videoBuffer.appendBuffer(videoSegments.shift().data);
videoBuffer.addEventListener('updateend', function() {
if (videoSegments.length) {
videoBuffer.appendBuffer(videoSegments.shift().data);
}
});
// buffer up the audio data
audioBuffer.appendBuffer(audioSegments.shift().data);
audioBuffer.addEventListener('updateend', function() {
if (audioSegments.length) {
audioBuffer.appendBuffer(audioSegments.shift().data);
}
});
});
......
......@@ -94,8 +94,10 @@
var onMediaSourceOpen = function() {
console.log('on media open');
ms.removeEventListener('sourceopen', onMediaSourceOpen);
var sourceBuffer = ms.addSourceBuffer('video/mp4;codecs="avc1.4D400D"');
sourceBuffer.appendBuffer(bytes);
var videoBuffer = ms.addSourceBuffer('video/mp4;codecs="avc1.4D400D"');
videoBuffer.appendBuffer(bytes);
var audioBuffer = ms.addSourceBuffer('audio/mp4;codecs=mp4a.40.2');
};
ms.addEventListener('sourceopen', onMediaSourceOpen);
......
......@@ -47,7 +47,9 @@ var
validateTrack,
validateTrackFragment,
videoPes;
transportPacket,
videoPes,
audioPes;
module('MP2T Packet Stream', {
setup: function() {
......@@ -397,15 +399,22 @@ test('parses an elementary stream packet with a pts and dts', function() {
equal(2 / 90, packet.dts, 'parsed the dts');
});
// helper function to create video PES packets
videoPes = function(data, first) {
/**
* Helper function to create transport stream PES packets
* @param pid {uint8} - the program identifier (PID)
* @param data {arraylike} - the payload bytes
* @payload first {boolean} - true if this PES should be a payload
* unit start
*/
transportPacket = function(pid, data, first) {
var
adaptationFieldLength = 188 - data.length - (first ? 18 : 17),
adaptationFieldLength = 188 - data.length - (first ? 15 : 14),
// transport_packet(), Rec. ITU-T H.222.0, Table 2-2
result = [
// sync byte
0x47,
// tei:0 pusi:1 tp:0 pid:0 0000 0001 0001
0x40, 0x11,
0x40, pid,
// tsc:01 afc:11 cc:0000
0x70
].concat([
......@@ -422,6 +431,7 @@ videoPes = function(data, first) {
result.push(0xff);
}
// PES_packet(), Rec. ITU-T H.222.0, Table 2-21
result = result.concat([
// pscp:0000 0000 0000 0000 0000 0001
0x00, 0x00, 0x01,
......@@ -437,14 +447,41 @@ videoPes = function(data, first) {
if (first) {
result.push(0x00);
}
result = result.concat([
return result.concat(data);
};
/**
* Helper function to create video PES packets
* @param data {arraylike} - the payload bytes
* @payload first {boolean} - true if this PES should be a payload
* unit start
*/
videoPes = function(data, first) {
return transportPacket(0x11, [
// NAL unit start code
0x00, 0x00, 0x01
].concat(data));
return result;
].concat(data), first);
};
standalonePes = videoPes([0xaf, 0x01], true);
/**
* Helper function to create audio PES packets
* @param data {arraylike} - the payload bytes
* @payload first {boolean} - true if this PES should be a payload
* unit start
*/
audioPes = function(data, first) {
var frameLength = data.length + 7;
return transportPacket(0x12, [
0xff, 0xf1, // no CRC
0x10, // AAC Main, 44.1KHz
0xb0 | ((frameLength & 0x1800) >> 11), // 2 channels
(frameLength & 0x7f8) >> 3,
((frameLength & 0x07) << 5) + 7, // frame length in bytes
0x00 // one AAC per ADTS frame
].concat(data), first);
};
test('parses an elementary stream packet without a pts or dts', function() {
var packet;
......@@ -950,17 +987,24 @@ test('generates AAC frame events from ADTS bytes', function() {
aacStream.push({
type: 'audio',
data: new Uint8Array([
0xff, 0xf1, // no CRC
0x00, // AAC Main, 44.1KHz
0xfc, 0x01, 0x20, // frame length 9 bytes
0x00, // one AAC per ADTS frame
0x12, 0x34, // AAC payload
0x56, 0x78 // extra junk that should be ignored
0xff, 0xf1, // no CRC
0x10, // AAC Main, 44.1KHz
0xbc, 0x01, 0x20, // 2 channels, frame length 9 bytes
0x00, // one AAC per ADTS frame
0x12, 0x34, // AAC payload
0x56, 0x78 // extra junk that should be ignored
])
});
equal(frames.length, 1, 'generated one frame');
deepEqual(frames[0].data, new Uint8Array([0x12, 0x34]), 'extracted AAC frame');
equal(frames[0].channelcount, 2, 'parsed channelcount');
equal(frames[0].samplerate, 44100, 'parsed samplerate');
// Chrome only supports 8, 16, and 32 bit sample sizes. Assuming the
// default value of 16 in ISO/IEC 14496-12 AudioSampleEntry is
// acceptable.
equal(frames[0].samplesize, 16, 'parsed samplesize');
});
// not handled: ADTS with CRC
......@@ -972,7 +1016,7 @@ module('Transmuxer', {
}
});
test('generates an init segment', function() {
test('generates a video init segment', function() {
var segments = [];
transmuxer.on('data', function(segment) {
segments.push(segment);
......@@ -980,16 +1024,38 @@ test('generates an init segment', function() {
transmuxer.push(packetize(PAT));
transmuxer.push(packetize(PMT));
transmuxer.push(packetize(videoPes([
0x07,
0x08, 0x01 // pic_parameter_set_rbsp
], true)));
transmuxer.push(packetize(videoPes([
0x07, // seq_parameter_set_rbsp
0x27, 0x42, 0xe0, 0x0b,
0xa9, 0x18, 0x60, 0x9d,
0x80, 0x53, 0x06, 0x01,
0x06, 0xb6, 0xc2, 0xb5,
0xef, 0x7c, 0x04
], false)));
transmuxer.end();
equal(segments.length, 2, 'generated init and media segments');
ok(segments[0].data, 'wrote data in the init segment');
equal(segments[0].type, 'video', 'video is the segment type');
});
test('generates an audio init segment', function() {
var segments = [];
transmuxer.on('data', function(segment) {
segments.push(segment);
});
transmuxer.push(packetize(PAT));
transmuxer.push(packetize(PMT));
transmuxer.push(packetize(audioPes([
0x00, 0x01
], true)));
transmuxer.end();
equal(segments.length, 1, 'has an init segment');
equal(segments.length, 2, 'generated init and media segments');
ok(segments[0].data, 'wrote data in the init segment');
equal(segments[0].type, 'audio', 'audio is the segment type');
});
test('buffers video samples until ended', function() {
......@@ -1123,20 +1189,26 @@ validateTrackFragment = function(track, segment, metadata) {
test('parses an example mp2t file and generates media segments', function() {
var
segments = [],
videoSegments = [],
audioSegments = [],
sequenceNumber = window.Infinity,
i, boxes, mfhd;
transmuxer.on('data', function(segment) {
segments.push(segment);
if (segment.type === 'video') {
videoSegments.push(segment);
} else if (segment.type === 'audio') {
audioSegments.push(segment);
}
});
transmuxer.push(window.bcSegment);
transmuxer.end();
equal(segments.length, 2, 'generated two segments');
equal(videoSegments.length, 2, 'generated two video segments');
equal(audioSegments.length, 2, 'generated two audio segments');
boxes = videojs.inspectMp4(segments[0].data);
equal(boxes.length, 2, 'init segments are composed of two boxes');
boxes = videojs.inspectMp4(videoSegments[0].data);
equal(boxes.length, 2, 'video init segments are composed of two boxes');
equal(boxes[0].type, 'ftyp', 'the first box is an ftyp');
equal(boxes[1].type, 'moov', 'the second box is a moov');
equal(boxes[1].boxes[0].type, 'mvhd', 'generated an mvhd');
......@@ -1150,9 +1222,9 @@ test('parses an example mp2t file and generates media segments', function() {
// });
// equal(boxes[1].boxes[3].type, 'mvex', 'generated an mvex');
boxes = videojs.inspectMp4(segments[1].data);
ok(boxes.length > 0, 'media segments are not empty');
ok(boxes.length % 2 === 0, 'media segments are composed of pairs of boxes');
boxes = videojs.inspectMp4(videoSegments[1].data);
ok(boxes.length > 0, 'video media segments are not empty');
ok(boxes.length % 2 === 0, 'video media segments are composed of pairs of boxes');
for (i = 0; i < boxes.length; i += 2) {
equal(boxes[i].type, 'moof', 'first box is a moof');
equal(boxes[i].boxes.length, 2, 'the moof has two children');
......@@ -1163,7 +1235,7 @@ test('parses an example mp2t file and generates media segments', function() {
sequenceNumber = mfhd.sequenceNumber;
equal(boxes[i + 1].type, 'mdat', 'second box is an mdat');
validateTrackFragment(boxes[i].boxes[1], segments[1].data, {
validateTrackFragment(boxes[i].boxes[1], videoSegments[1].data, {
trackId: 256,
width: 388,
height: 300,
......