5cfc5611 by David LaPalomento

Generate init segment based on elementary streams

Capture the number of elementary streams in a transport stream program and map those to mp4 tracks in the init segment. Fix up some examples and update MSE documentation.
1 parent c080e98d
......@@ -9,8 +9,18 @@ Chrome 36.
## ISO Base Media File Format (BMFF)
### Init Segment
A working initialization segment is outlined below. It may be possible
to trim this structure down further.
- `ftyp`
- `moov`
- `mvhd`
- `trak`
- `tkhd`
- `mdia`
- `mdhd`
- `hdlr`
- `minf`
- `mvex`
### Media Segment
......@@ -22,7 +32,7 @@ movie data is outlined below:
- `traf`
- `tfhd`
- `tfdt`
- `trun`
- `trun` containing samples
- `mdat`
### Structure
......
......@@ -221,8 +221,19 @@ moof = function(sequenceNumber, tracks) {
mfhd(sequenceNumber),
box.apply(null, trafCall));
};
moov = function(duration, width, height, type) {
return box(types.moov, mvhd(duration), trak(duration, width, height, type), mvex());
/**
* @param tracks... (optional) {array} the tracks associated with this movie
*/
moov = function(tracks) {
var
i = tracks.length,
boxes = [];
while (i--) {
boxes[i] = trak(tracks[i]);
}
return box.apply(null, [types.moov, mvhd(0xffffffff)].concat(boxes).concat(mvex()));
};
mvex = function() {
return box(types.mvex, box(types.trex, TREX));
......@@ -331,18 +342,21 @@ styp = function() {
return box(types.styp, MAJOR_BRAND, MINOR_VERSION, MAJOR_BRAND);
};
tkhd = function(duration, width, height) {
tkhd = function(track) {
return box(types.tkhd, new Uint8Array([
0x00, // version 0
0x00, 0x00, 0x00, // flags
0x00, 0x00, 0x00, 0x00, // creation_time
0x00, 0x00, 0x00, 0x00, // modification_time
0x00, 0x00, 0x00, 0x01, // track_ID
(track.id & 0xFF000000) >> 24,
(track.id & 0xFF0000) >> 16,
(track.id & 0xFF00) >> 8,
track.id & 0xFF, // track_ID
0x00, 0x00, 0x00, 0x00, // reserved
(duration & 0xFF000000) >> 24,
(duration & 0xFF0000) >> 16,
(duration & 0xFF00) >> 8,
duration & 0xFF, // duration
(track.duration & 0xFF000000) >> 24,
(track.duration & 0xFF0000) >> 16,
(track.duration & 0xFF00) >> 8,
track.duration & 0xFF, // duration
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, // reserved
0x00, 0x00, // layer
......@@ -358,17 +372,25 @@ tkhd = function(duration, width, height) {
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x40, 0x00, 0x00, 0x00, // transformation: unity matrix
(width & 0xFF00) >> 8,
width & 0xFF,
(track.width & 0xFF00) >> 8,
track.width & 0xFF,
0x00, 0x00, // width
(height & 0xFF00) >> 8,
height & 0xFF,
(track.height & 0xFF00) >> 8,
track.height & 0xFF,
0x00, 0x00 // height
]));
};
trak = function(duration, width, height, type) {
return box(types.trak, tkhd(duration, width, height), mdia(duration, width, height, type));
/**
* Generate a track box.
* @param track {object} a track definition
* @return {Uint8Array} the track box
*/
trak = function(track) {
track.duration = track.duration || 0xffffffff;
return box(types.trak,
tkhd(track),
mdia(track.duration, track.width, track.height, track.type));
};
window.videojs.mp4 = {
......@@ -376,12 +398,13 @@ window.videojs.mp4 = {
mdat: mdat,
moof: moof,
moov: moov,
initSegment: function() {
initSegment: function(tracks) {
var
fileType = ftyp(),
movie = moov(0xffffffff, 1280, 720, "video"),
result = new Uint8Array(fileType.byteLength + movie.byteLength);
movie = moov(tracks),
result;
result = new Uint8Array(fileType.byteLength + movie.byteLength);
result.set(fileType);
result.set(movie, fileType.byteLength);
return result;
......
......@@ -124,10 +124,7 @@ ParseStream = function() {
* fields parsed from the PMT.
*/
parsePmt = function(payload, pmt) {
var tableEnd, programInfoLength, offset;
pmt.section_number = payload[6];
pmt.last_section_number = payload[7];
var sectionLength, tableEnd, programInfoLength, offset;
// PMTs can be sent ahead of the time when they should actually
// take effect. We don't believe this should ever be the case
......@@ -141,9 +138,11 @@ ParseStream = function() {
// overwrite any existing program map table
self.programMapTable = {};
// the mapping table ends right before the 32-bit CRC
tableEnd = payload.byteLength - 4;
// to determine where the table starts, we have to figure out how
// the mapping table ends at the end of the current section
sectionLength = (payload[1] & 0x0f) << 8 | payload[2];
tableEnd = 3 + sectionLength - 4;
// to determine where the table is, we have to figure out how
// long the program info descriptors are
programInfoLength = (payload[10] & 0x0f) << 8 | payload[11];
......@@ -273,7 +272,7 @@ ProgramStream = function() {
data: [],
size: 0
},
flushStream = function(stream, type, pes) {
flushStream = function(stream, type) {
var
event = {
type: type,
......@@ -282,16 +281,6 @@ ProgramStream = function() {
i = 0,
fragment;
if ( pes !== undefined) {
// move over data from PES into Stream frame
event.pes = {};
event.pes.pts = pes.pts;
event.pes.dts = pes.dts;
event.pes.pid = pes.pid;
event.pes.dataAlignmentIndicator = pes.dataAlignmentIndicator;
event.pes.payloadUnitStartIndicator = pes.payloadUnitStartIndicator;
}
// do nothing if there is no buffered data
if (!stream.data.length) {
return;
......@@ -333,7 +322,7 @@ ProgramStream = function() {
// if a new packet is starting, we can flush the completed
// packet
if (data.payloadUnitStartIndicator) {
flushStream(stream, streamType, data);
flushStream(stream, streamType);
}
// buffer this fragment until we are sure we've received the
......@@ -358,8 +347,10 @@ ProgramStream = function() {
track.id = +k;
if (programMapTable[k] === H264_STREAM_TYPE) {
track.codec = 'avc';
track.type = 'video';
} else if (programMapTable[k] === ADTS_STREAM_TYPE) {
track.codec = 'adts';
track.type = 'audio';
}
event.tracks.push(track);
}
......@@ -412,9 +403,18 @@ H264Stream = function() {
self = this;
this.push = function(packet) {
if (packet.type === 'video') {
this.trigger('data', packet);
if (packet.type !== 'video') {
return;
}
switch (packet.data[0]) {
case 0x09:
packet.nalUnitType = 'access_unit_delimiter_rbsp';
break;
default:
break;
}
this.trigger('data', packet);
};
};
H264Stream.prototype = new videojs.Hls.Stream();
......@@ -424,7 +424,14 @@ Transmuxer = function() {
var
self = this,
sequenceNumber = 0,
packetStream, parseStream, programStream, aacStream, h264Stream;
initialized = false,
videoSamples = [],
videoSamplesSize = 0,
packetStream, parseStream, programStream, aacStream, h264Stream,
flushVideo;
Transmuxer.prototype.init.call(this);
// set up the parsing pipeline
......@@ -439,13 +446,23 @@ Transmuxer = function() {
programStream.pipe(aacStream);
programStream.pipe(h264Stream);
// generate an init segment
this.initSegment = mp4.initSegment();
// helper functions
flushVideo = function() {
var moof, mdat, boxes, i, data;
moof = mp4.moof(sequenceNumber, []);
// concatenate the video data and construct the mdat
data = new Uint8Array(videoSamplesSize);
i = 0;
while (videoSamples.length) {
data.set(videoSamples[0].data, i);
i += videoSamples[0].data.byteLength;
videoSamples.shift();
}
videoSamplesSize = 0;
mdat = mp4.mdat(data);
h264Stream.on('data', function(data) {
var
moof = mp4.moof(sequenceNumber, []),
mdat = mp4.mdat(data.data),
// it would be great to allocate this array up front instead of
// throwing away hundreds of media segment fragments
boxes = new Uint8Array(moof.byteLength + mdat.byteLength);
......@@ -459,13 +476,41 @@ Transmuxer = function() {
self.trigger('data', {
data: boxes
});
};
// handle incoming data events
h264Stream.on('data', function(data) {
// if this chunk starts a new access unit, flush the data we've been buffering
if (data.nalUnitType === 'access_unit_delimiter_rbsp' &&
videoSamples.length) {
flushVideo();
}
// buffer video until we encounter a new access unit (aka the next frame)
videoSamples.push(data);
videoSamplesSize += data.data.byteLength;
});
programStream.on('data', function(data) {
// generate init segments based on stream metadata
if (!initialized && data.type === 'metadata') {
self.trigger('data', {
data: mp4.initSegment(data.tracks)
});
initialized = true;
}
});
// feed incoming data to the front of the parsing pipeline
this.push = function(data) {
packetStream.push(data);
};
// flush any buffered data
this.end = programStream.end;
this.end = function() {
programStream.end();
if (videoSamples.length) {
flushVideo();
}
};
};
Transmuxer.prototype = new videojs.Hls.Stream();
......
......@@ -41,7 +41,13 @@ test('generates a BSMFF ftyp', function() {
test('generates a moov', function() {
var boxes, mvhd, tkhd, mdhd, hdlr, minf, mvex,
data = mp4.moov(100, 600, 300, "video");
data = mp4.moov([{
id: 7,
duration: 100,
width: 600,
height: 300,
type: 'video'
}]);
ok(data, 'box is not null');
......@@ -53,12 +59,13 @@ test('generates a moov', function() {
mvhd = boxes[0].boxes[0];
equal(mvhd.type, 'mvhd', 'generated a mvhd');
equal(mvhd.duration, 100, 'wrote the movie header duration');
equal(mvhd.duration, 0xffffffff, 'wrote the maximum movie header duration');
equal(boxes[0].boxes[1].type, 'trak', 'generated a trak');
equal(boxes[0].boxes[1].boxes.length, 2, 'generated two track sub boxes');
tkhd = boxes[0].boxes[1].boxes[0];
equal(tkhd.type, 'tkhd', 'generated a tkhd');
equal(tkhd.trackId, 7, 'wrote the track id');
equal(tkhd.duration, 100, 'wrote duration into the track header');
equal(tkhd.width, 600, 'wrote width into the track header');
equal(tkhd.height, 300, 'wrote height into the track header');
......@@ -69,7 +76,7 @@ test('generates a moov', function() {
mdhd = boxes[0].boxes[1].boxes[1].boxes[0];
equal(mdhd.type, 'mdhd', 'generate an mdhd type');
equal(mdhd.language, 'und', 'wrote undetermined language');
equal(mdhd.duration, 100, 'wrote duraiton into the media header');
equal(mdhd.duration, 100, 'wrote duration into the media header');
hdlr = boxes[0].boxes[1].boxes[1].boxes[1];
equal(hdlr.type, 'hdlr', 'generate an hdlr type');
......@@ -206,7 +213,12 @@ test('generates a moov', function() {
test('generates a sound hdlr', function() {
var boxes, hdlr,
data = mp4.moov(100, 600, 300, "audio");
data = mp4.moov([{
duration:100,
width: 600,
height: 300,
type: 'audio'
}]);
ok(data, 'box is not null');
......@@ -220,7 +232,12 @@ test('generates a sound hdlr', function() {
test('generates a video hdlr', function() {
var boxes, hdlr,
data = mp4.moov(100, 600, 300, "video");
data = mp4.moov([{
duration: 100,
width: 600,
height: 300,
type: 'video'
}]);
ok(data, 'box is not null');
......@@ -234,14 +251,40 @@ test('generates a video hdlr', function() {
test('generates an initialization segment', function() {
var
data = mp4.initSegment(),
init;
data = mp4.initSegment([{
id: 1,
width: 600,
height: 300,
type: 'video'
}, {
id: 2,
type: 'audio'
}]),
init, mvhd, trak1, trak2, mvex;
init = videojs.inspectMp4(data);
equal(init.length, 2, 'generated two boxes');
equal(init[0].type, 'ftyp', 'generated a ftyp box');
equal(init[1].type, 'moov', 'generated a moov box');
equal(init[1].boxes[0].duration, 0xffffffff, 'wrote a maximum duration');
mvhd = init[1].boxes[0];
equal(mvhd.type, 'mvhd', 'wrote an mvhd');
trak1 = init[1].boxes[1];
equal(trak1.type, 'trak', 'wrote a trak');
equal(trak1.boxes[0].trackId, 1, 'wrote the first track id');
equal(trak1.boxes[0].width, 600, 'wrote the first track width');
equal(trak1.boxes[0].height, 300, 'wrote the first track height');
equal(trak1.boxes[1].boxes[1].handlerType, 'vide', 'wrote the first track type');
trak2 = init[1].boxes[2];
equal(trak2.type, 'trak', 'wrote a trak');
equal(trak2.boxes[0].trackId, 2, 'wrote the second track id');
equal(trak2.boxes[1].boxes[1].handlerType, 'soun', 'wrote the second track type');
mvex = init[1].boxes[3];
equal(mvex.type, 'mvex', 'wrote an mvex');
});
test('generates a minimal moof', function() {
......
......@@ -27,6 +27,8 @@ var
parseStream,
ProgramStream = videojs.mp2t.ProgramStream,
programStream,
H264Stream = videojs.mp2t.H264Stream,
h264Stream,
Transmuxer = videojs.mp2t.Transmuxer,
transmuxer,
......@@ -37,7 +39,9 @@ var
PAT,
PMT,
standalonePes;
standalonePes,
videoPes;
module('MP2T Packet Stream', {
setup: function() {
......@@ -261,8 +265,8 @@ PMT = [
0x40, 0x10,
// tsc:01 afc:01 cc:0000 pointer_field:0000 0000
0x50, 0x00,
// tid:0000 0000 ssi:0 0:0 r:00 sl:0000 0010 1111
0x00, 0x00, 0x2f,
// tid:0000 0010 ssi:0 0:0 r:00 sl:0000 0001 0111
0x02, 0x00, 0x17,
// pn:0000 0000 0000 0001
0x00, 0x01,
// r:00 vn:00 000 cni:1 sn:0000 0000 lsn:0000 0000
......@@ -292,12 +296,13 @@ test('parse the elementary streams from a program map table', function() {
});
parseStream.pmtPid = 0x0010;
parseStream.push(new Uint8Array(PMT));
parseStream.push(new Uint8Array(PMT.concat(0, 0, 0, 0, 0)));
ok(packet, 'parsed a packet');
ok(parseStream.programMapTable, 'parsed a program map');
strictEqual(0x1b, parseStream.programMapTable[0x11], 'associated h264 with pid 0x11');
strictEqual(0x0f, parseStream.programMapTable[0x12], 'associated adts with pid 0x12');
strictEqual(parseStream.programMapTable[0], undefined, 'ignored trailing stuffing bytes');
deepEqual(parseStream.programMapTable, packet.programMapTable, 'recorded the PMT');
});
......@@ -386,7 +391,12 @@ test('parses an elementary stream packet with a pts and dts', function() {
equal(2 / 90, packet.dts, 'parsed the dts');
});
standalonePes = [
// helper function to create video PES packets
videoPes = function(data) {
if (data.length !== 2) {
throw new Error('video PES only accepts 2 byte payloads');
}
return [
0x47, // sync byte
// tei:0 pusi:1 tp:0 pid:0 0000 0001 0001
0x40, 0x11,
......@@ -433,10 +443,10 @@ standalonePes = [
// pdf:00 ef:1 erf:0 dtmf:0 acif:0 pcf:0 pef:0
0x20,
// phdl:0000 0000
0x00,
// "data":1010 1111 0000 0001
0xaf, 0x01
];
0x00
].concat(data);
};
standalonePes = videoPes([0xaf, 0x01]);
test('parses an elementary stream packet without a pts or dts', function() {
......@@ -503,10 +513,12 @@ test('parses metadata events from PSI packets', function() {
metadatas[0].tracks.sort(sortById);
deepEqual(metadatas[0].tracks, [{
id: 1,
codec: 'avc'
codec: 'avc',
type: 'video'
}, {
id: 2,
codec: 'adts'
codec: 'adts',
type: 'audio'
}], 'identified two tracks');
});
......@@ -628,6 +640,26 @@ test('flushes the buffered packets when a new one of that type is started', func
equal(7, packets[2].data.byteLength, 'parsed the audio payload');
});
module('H264 Stream', {
setup: function() {
h264Stream = new H264Stream();
}
});
test('parses nal unit types', function() {
var data;
h264Stream.on('data', function(event) {
data = event;
});
h264Stream.push({
type: 'video',
data: new Uint8Array([0x09])
});
ok(data, 'generated a data event');
equal(data.nalUnitType, 'access_unit_delimiter_rbsp', 'identified an access unit delimiter');
});
module('Transmuxer', {
setup: function() {
transmuxer = new Transmuxer();
......@@ -635,11 +667,48 @@ module('Transmuxer', {
});
test('generates an init segment', function() {
var segments = [];
transmuxer.on('data', function(segment) {
segments.push(segment);
});
transmuxer.push(packetize(PAT));
transmuxer.push(packetize(PMT));
transmuxer.push(packetize(standalonePes));
transmuxer.end();
ok(transmuxer.initSegment, 'has an init segment');
equal(segments.length, 2, 'has an init segment');
});
test('buffers video samples until an access unit', function() {
var samples = [], boxes;
transmuxer.on('data', function(data) {
samples.push(data);
});
transmuxer.push(packetize(PAT));
transmuxer.push(packetize(PMT));
// buffer a NAL
transmuxer.push(packetize(videoPes([0x09, 0x01])));
transmuxer.push(packetize(videoPes([0x00, 0x02])));
// an access_unit_delimiter_rbsp should flush the buffer
transmuxer.push(packetize(videoPes([0x09, 0x03])));
transmuxer.push(packetize(videoPes([0x00, 0x04])));
equal(samples.length, 2, 'emitted two events');
boxes = videojs.inspectMp4(samples[1].data);
equal(boxes.length, 2, 'generated two boxes');
equal(boxes[0].type, 'moof', 'the first box is a moof');
equal(boxes[1].type, 'mdat', 'the second box is a mdat');
deepEqual(new Uint8Array(samples[1].data.subarray(samples[1].data.length - 4)),
new Uint8Array([0x09, 0x01, 0x00, 0x02]),
'concatenated NALs into an mdat');
// flush the last access unit
transmuxer.end();
equal(samples.length, 3, 'flushed the final access unit');
deepEqual(new Uint8Array(samples[2].data.subarray(samples[2].data.length - 4)),
new Uint8Array([0x09, 0x03, 0x00, 0x04]),
'concatenated NALs into an mdat');
});
test('parses an example mp2t file and generates media segments', function() {
......@@ -653,23 +722,32 @@ test('parses an example mp2t file and generates media segments', function() {
transmuxer.push(window.bcSegment);
transmuxer.end();
ok(segments.length, 'generated media segments');
i = segments.length;
while (i--) {
boxes = videojs.inspectMp4(segments[i].data);
equal(boxes.length, 2, 'segments are composed of two boxes');
equal(boxes[0].type, 'moof', 'first box is a moof');
equal(boxes[0].boxes.length, 2, 'the moof has two children');
mfhd = boxes[0].boxes[0];
equal(segments.length, 2, 'generated two segments');
boxes = videojs.inspectMp4(segments[0].data);
equal(boxes.length, 2, 'init segments are composed of two boxes');
equal(boxes[0].type, 'ftyp', 'the first box is an ftyp');
equal(boxes[1].type, 'moov', 'the second box is a moov');
equal(boxes[1].boxes[0].type, 'mvhd', 'generated an mvhd');
equal(boxes[1].boxes[1].type, 'trak', 'generated a trak');
equal(boxes[1].boxes[2].type, 'trak', 'generated a second trak');
equal(boxes[1].boxes[3].type, 'mvex', 'generated an mvex');
boxes = videojs.inspectMp4(segments[1].data);
ok(boxes.length > 0, 'media segments are not empty');
ok(boxes.length % 2 === 0, 'media segments are composed of pairs of boxes');
for (i = 0; i < boxes.length; i += 2) {
equal(boxes[i].type, 'moof', 'first box is a moof');
equal(boxes[i].boxes.length, 2, 'the moof has two children');
mfhd = boxes[i].boxes[0];
equal(mfhd.type, 'mfhd', 'mfhd is a child of the moof');
ok(mfhd.sequenceNumber < sequenceNumber, 'sequence numbers are increasing');
sequenceNumber = mfhd.sequenceNumber;
traf = boxes[0].boxes[1];
traf = boxes[i].boxes[1];
equal(traf.type, 'traf', 'traf is a child of the moof');
equal(boxes[1].type, 'mdat', 'second box is an mdat');
equal(boxes[i + 1].type, 'mdat', 'second box is an mdat');
}
});
......