85ee7214 by David LaPalomento

Get init segments working with real metadata

Parse and pass along additional track info from the sequence parameter set.
1 parent 0816934b
(function(window, videojs, undefined) {
'use strict';
var box, dinf, ftyp, mdat, mfhd, minf, moof, moov, mvex, mvhd, trak, tkhd, mdia, mdhd, hdlr, stbl,
stsd, styp, types, MAJOR_BRAND, MINOR_VERSION, VIDEO_HDLR, AUDIO_HDLR, HDLR_TYPES, VMHD, DREF, STCO, STSC, STSZ, STTS, TREX,
Uint8Array, DataView;
var box, dinf, ftyp, mdat, mfhd, minf, moof, moov, mvex, mvhd, trak,
tkhd, mdia, mdhd, hdlr, stbl, stsd, styp, trex, types,
MAJOR_BRAND, MINOR_VERSION, AVC1_BRAND, VIDEO_HDLR, AUDIO_HDLR,
HDLR_TYPES, VMHD, DREF, STCO, STSC, STSZ, STTS, Uint8Array,
DataView;
Uint8Array = window.Uint8Array;
DataView = window.DataView;
......@@ -60,6 +62,12 @@ DataView = window.DataView;
'o'.charCodeAt(0),
'm'.charCodeAt(0)
]);
AVC1_BRAND = new Uint8Array([
'a'.charCodeAt(0),
'v'.charCodeAt(0),
'c'.charCodeAt(0),
'1'.charCodeAt(0)
]);
MINOR_VERSION = new Uint8Array([0, 0, 0, 1]);
VIDEO_HDLR = new Uint8Array([
0x00, // version 0
......@@ -98,15 +106,6 @@ DataView = window.DataView;
0x00, // version 0
0x00, 0x00, 0x01 // entry_flags
]);
TREX = new Uint8Array([
0x00, // version 0
0x00, 0x00, 0x00, // flags
0x00, 0x00, 0x00, 0x01, // track_ID
0x00, 0x00, 0x00, 0x01, // default_sample_description_index
0x00, 0x00, 0x00, 0x00, // default_sample_duration
0x00, 0x00, 0x00, 0x00, // default_sample_size
0x00, 0x01, 0x00, 0x01 // default_sample_flags
]);
STCO = new Uint8Array([
0x00, // version
0x00, 0x00, 0x00, // flags
......@@ -160,7 +159,7 @@ dinf = function() {
};
ftyp = function() {
return box(types.ftyp, MAJOR_BRAND, MINOR_VERSION, MAJOR_BRAND);
return box(types.ftyp, MAJOR_BRAND, MINOR_VERSION, MAJOR_BRAND, AVC1_BRAND);
};
hdlr = function(type) {
......@@ -185,8 +184,8 @@ mdhd = function(duration) {
0x00, 0x00
]));
};
mdia = function(duration, width, height, type) {
return box(types.mdia, mdhd(duration), hdlr(type), minf(width, height));
mdia = function(track) {
return box(types.mdia, mdhd(track.duration), hdlr(track.type), minf(track));
};
mfhd = function(sequenceNumber) {
return box(types.mfhd, new Uint8Array([
......@@ -198,8 +197,8 @@ mfhd = function(sequenceNumber) {
sequenceNumber & 0xFF, // sequence_number
]));
};
minf = function(width, height) {
return box(types.minf, box(types.vmhd, VMHD), dinf(), stbl(width, height));
minf = function(track) {
return box(types.minf, box(types.vmhd, VMHD), dinf(), stbl(track));
};
moof = function(sequenceNumber, tracks) {
var
......@@ -233,10 +232,17 @@ moov = function(tracks) {
boxes[i] = trak(tracks[i]);
}
return box.apply(null, [types.moov, mvhd(0xffffffff)].concat(boxes).concat(mvex()));
return box.apply(null, [types.moov, mvhd(0xffffffff)].concat(boxes).concat(mvex(tracks)));
};
mvex = function() {
return box(types.mvex, box(types.trex, TREX));
mvex = function(tracks) {
var
i = tracks.length,
boxes = [];
while (i--) {
boxes[i] = trex(tracks[i]);
}
return box.apply(null, [types.mvex].concat(boxes));
};
mvhd = function(duration) {
var
......@@ -270,21 +276,41 @@ mvhd = function(duration) {
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, // pre_defined
0x00, 0x00, 0x00, 0x01 // next_track_ID
0xff, 0xff, 0xff, 0xff // next_track_ID
]);
return box(types.mvhd, bytes);
};
stbl = function(width, height) {
stbl = function(track) {
return box(types.stbl,
stsd(width, height),
stsd(track),
box(types.stts, STTS),
box(types.stsc, STSC),
box(types.stsz, STSZ),
box(types.stco, STCO));
};
stsd = function(width, height) {
stsd = function(track) {
var sequenceParameterSets = [], pictureParameterSets = [], i;
if (track.type === 'audio') {
return box(types.stsd);
}
// assemble the SPSs
for (i = 0; i < track.sps.length; i++) {
sequenceParameterSets.push((track.sps[i].byteLength & 0xFF00) >>> 8);
sequenceParameterSets.push((track.sps[i].byteLength & 0xFF)); // sequenceParameterSetLength
sequenceParameterSets = sequenceParameterSets.concat(Array.prototype.slice.call(track.sps[i])); // SPS
}
// assemble the PPSs
for (i = 0; i < track.pps.length; i++) {
pictureParameterSets.push((track.pps[i].byteLength & 0xFF00) >>> 8);
pictureParameterSets.push((track.pps[i].byteLength & 0xFF));
pictureParameterSets = pictureParameterSets.concat(Array.prototype.slice.call(track.pps[i]));
}
return box(types.stsd, new Uint8Array([
0x00, // version 0
0x00, 0x00, 0x00, // flags
......@@ -298,10 +324,10 @@ stsd = function(width, height) {
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, // pre_defined
(width & 0xff00) >> 8,
width & 0xff, // width
(height & 0xff00) >> 8,
height & 0xff, // height
(track.width & 0xff00) >> 8,
track.width & 0xff, // width
(track.height & 0xff00) >> 8,
track.height & 0xff, // height
0x00, 0x48, 0x00, 0x00, // horizresolution
0x00, 0x48, 0x00, 0x00, // vertresolution
0x00, 0x00, 0x00, 0x00, // reserved
......@@ -319,18 +345,15 @@ stsd = function(width, height) {
0x11, 0x11]), // pre_defined = -1
box(types.avcC, new Uint8Array([
0x01, // configurationVersion
0x4d, // AVCProfileIndication??
0x40, // profile_compatibility
0x20, // AVCLevelIndication
0xff, // lengthSizeMinusOne
0xe1, // numOfSequenceParameterSets
0x00, 0x0c, // sequenceParameterSetLength
0x67, 0x4d, 0x40, 0x20,
0x96, 0x52, 0x80, 0xa0,
0x0b, 0x76, 0x02, 0x05, // SPS
0x01, // numOfPictureParameterSets
0x00, 0x04, // pictureParameterSetLength
0x68, 0xef, 0x38, 0x80])), // "PPS"
track.profileIdc, // AVCProfileIndication
track.profileCompatibility, // profile_compatibility
track.levelIdc, // AVCLevelIndication
0xff // lengthSizeMinusOne
].concat([
track.sps.length // numOfSequenceParameterSets
]).concat(sequenceParameterSets).concat([
track.pps.length // numOfPictureParameterSets
]).concat(pictureParameterSets))), // "PPS"
box(types.btrt, new Uint8Array([
0x00, 0x1c, 0x9c, 0x80, // bufferSizeDB
0x00, 0x2d, 0xc6, 0xc0, // maxBitrate
......@@ -390,7 +413,22 @@ trak = function(track) {
track.duration = track.duration || 0xffffffff;
return box(types.trak,
tkhd(track),
mdia(track.duration, track.width, track.height, track.type));
mdia(track));
};
trex = function(track) {
return box(types.trex, new Uint8Array([
0x00, // version 0
0x00, 0x00, 0x00, // flags
(track.id & 0xFF000000) >> 24,
(track.id & 0xFF0000) >> 16,
(track.id & 0xFF00) >> 8,
(track.id & 0xFF), // track_ID
0x00, 0x00, 0x00, 0x01, // default_sample_description_index
0x00, 0x00, 0x00, 0x00, // default_sample_duration
0x00, 0x00, 0x00, 0x00, // default_sample_size
0x00, 0x01, 0x00, 0x01 // default_sample_flags
]));
};
window.videojs.mp4 = {
......
......@@ -497,7 +497,10 @@ H264Stream = function() {
case 0x07:
event.nalUnitType = 'seq_parameter_set_rbsp';
event.dimensions = readSequenceParameterSet(data.subarray(1));
event.config = readSequenceParameterSet(data.subarray(1));
break;
case 0x08:
event.nalUnitType = 'pic_parameter_set_rbsp';
break;
default:
......@@ -541,8 +544,9 @@ H264Stream = function() {
* properties. A sequence parameter set is the H264 metadata that
* describes the properties of upcoming video frames.
* @param data {Uint8Array} the bytes of a sequence parameter set
* @return {object} an object with width and height properties
* specifying the dimensions of the associated video frames.
* @return {object} an object with configuration parsed from the
* sequence parameter set, including the dimensions of the
* associated video frames.
*/
readSequenceParameterSet = function(data) {
var
......@@ -550,16 +554,19 @@ H264Stream = function() {
frameCropRightOffset = 0,
frameCropTopOffset = 0,
frameCropBottomOffset = 0,
expGolombDecoder, profileIdc, chromaFormatIdc, picOrderCntType,
expGolombDecoder, profileIdc, levelIdc, profileCompatibility,
chromaFormatIdc, picOrderCntType,
numRefFramesInPicOrderCntCycle, picWidthInMbsMinus1,
picHeightInMapUnitsMinus1, frameMbsOnlyFlag,
picHeightInMapUnitsMinus1,
frameMbsOnlyFlag,
scalingListCount,
i;
expGolombDecoder = new videojs.Hls.ExpGolomb(data);
profileIdc = expGolombDecoder.readUnsignedByte(); // profile_idc
// constraint_set[0-5]_flag, u(1), reserved_zero_2bits u(2), level_idc u()8
expGolombDecoder.skipBits(16);
profileCompatibility = expGolombDecoder.readBits(5); // constraint_set[0-5]_flag
expGolombDecoder.skipBits(3); // u(1), reserved_zero_2bits u(2)
levelIdc = expGolombDecoder.readUnsignedByte(); // level_idc u(8)
expGolombDecoder.skipUnsignedExpGolomb(); // seq_parameter_set_id
// some profiles have more optional data we don't need
......@@ -628,6 +635,9 @@ H264Stream = function() {
}
return {
profileIdc: profileIdc,
levelIdc: levelIdc,
profileCompatibility: profileCompatibility,
width: ((picWidthInMbsMinus1 + 1) * 16) - frameCropLeftOffset * 2 - frameCropRightOffset * 2,
height: ((2 - frameMbsOnlyFlag) * (picHeightInMapUnitsMinus1 + 1) * 16) - (frameCropTopOffset * 2) - (frameCropBottomOffset * 2)
};
......@@ -644,7 +654,8 @@ Transmuxer = function() {
videoSamples = [],
videoSamplesSize = 0,
tracks,
dimensions,
config,
pps,
packetStream, parseStream, programStream, aacStream, h264Stream,
......@@ -673,21 +684,43 @@ Transmuxer = function() {
videoSamples.length) {
//flushVideo();
}
// generate an init segment once all the metadata is available
// record the track config
if (data.nalUnitType === 'seq_parameter_set_rbsp' &&
!dimensions) {
dimensions = data.dimensions;
!config) {
config = data.config;
i = tracks.length;
while (i--) {
if (tracks[i].type === 'video') {
tracks[i].width = dimensions.width;
tracks[i].height = dimensions.height;
tracks[i].width = config.width;
tracks[i].height = config.height;
tracks[i].sps = [data.data];
tracks[i].profileIdc = config.profileIdc;
tracks[i].levelIdc = config.levelIdc;
tracks[i].profileCompatibility = config.profileCompatibility;
}
}
self.trigger('data', {
data: videojs.mp4.initSegment(tracks)
});
// generate an init segment once all the metadata is available
if (pps) {
self.trigger('data', {
data: videojs.mp4.initSegment(tracks)
});
}
}
if (data.nalUnitType === 'pic_parameter_set_rbsp' &&
!pps) {
pps = data.data;i = tracks.length;
while (i--) {
if (tracks[i].type === 'video') {
tracks[i].pps = [data.data];
}
}
if (config) {
self.trigger('data', {
data: videojs.mp4.initSegment(tracks)
});
}
}
// buffer video until we encounter a new access unit (aka the next frame)
......
......@@ -46,7 +46,12 @@ test('generates a moov', function() {
duration: 100,
width: 600,
height: 300,
type: 'video'
type: 'video',
profileIdc: 3,
levelIdc: 5,
profileCompatibility: 7,
sps: [new Uint8Array([0, 1, 2]), new Uint8Array([3, 4, 5])],
pps: [new Uint8Array([6, 7, 8])]
}]);
ok(data, 'box is not null');
......@@ -60,6 +65,7 @@ test('generates a moov', function() {
mvhd = boxes[0].boxes[0];
equal(mvhd.type, 'mvhd', 'generated a mvhd');
equal(mvhd.duration, 0xffffffff, 'wrote the maximum movie header duration');
equal(mvhd.nextTrackId, 0xffffffff, 'wrote the max next track id');
equal(boxes[0].boxes[1].type, 'trak', 'generated a trak');
equal(boxes[0].boxes[1].boxes.length, 2, 'generated two track sub boxes');
......@@ -119,15 +125,15 @@ test('generates a moov', function() {
equal(minf.boxes[2].type, 'stbl', 'generates an stbl type');
deepEqual({
type: 'stbl',
size: 233,
size: 228,
boxes: [{
type: 'stsd',
size: 157,
size: 152,
version: 0,
flags: new Uint8Array([0, 0, 0]),
sampleDescriptions: [{
type: 'avc1',
size: 141,
size: 136,
dataReferenceIndex: 1,
width: 600,
height: 300,
......@@ -137,19 +143,19 @@ test('generates a moov', function() {
depth: 24,
config: [{
type: 'avcC',
size: 35,
size: 30,
configurationVersion: 1,
avcProfileIndication: 0x4d,
profileCompatibility: 0x40,
avcLevelIndication: 0x20,
avcProfileIndication: 3,
avcLevelIndication: 5,
profileCompatibility: 7,
lengthSizeMinusOne: 3,
sps: [new Uint8Array([
0x67, 0x4d, 0x40, 0x20,
0x96, 0x52, 0x80, 0xa0,
0x0b, 0x76, 0x02, 0x05
0, 1, 2
]), new Uint8Array([
3, 4, 5
])],
pps: [new Uint8Array([
0x68, 0xef, 0x38, 0x80
6, 7, 8
])]
}, {
type: 'btrt',
......@@ -198,7 +204,7 @@ test('generates a moov', function() {
size: 32,
version: 0,
flags: new Uint8Array([0, 0, 0]),
trackId: 1,
trackId: 7,
defaultSampleDescriptionIndex: 1,
defaultSampleDuration: 0,
defaultSampleSize: 0,
......@@ -237,7 +243,9 @@ test('generates a video hdlr', function() {
duration: 100,
width: 600,
height: 300,
type: 'video'
type: 'video',
sps: [],
pps: []
}]);
ok(data, 'box is not null');
......@@ -256,7 +264,9 @@ test('generates an initialization segment', function() {
id: 1,
width: 600,
height: 300,
type: 'video'
type: 'video',
sps: [new Uint8Array([0])],
pps: [new Uint8Array([1])]
}, {
id: 2,
type: 'audio'
......
......@@ -750,6 +750,18 @@ test('parses nal unit types', function() {
h264Stream.end();
ok(data, 'generated a data event');
equal(data.nalUnitType, 'seq_parameter_set_rbsp', 'identified a sequence parameter set');
data = null;
h264Stream.push({
type: 'video',
data: new Uint8Array([
0x00, 0x00, 0x00, 0x01,
0x08, 0x01
])
});
h264Stream.end();
ok(data, 'generated a data event');
equal(data.nalUnitType, 'pic_parameter_set_rbsp', 'identified a picture parameter set');
});
module('Transmuxer', {
......@@ -775,7 +787,7 @@ test('generates an init segment', function() {
], true)));
transmuxer.end();
equal(segments.length, 2, 'has an init segment');
equal(segments.length, 1, 'has an init segment');
});
test('buffers video samples until ended', function() {
......