55ac868e by David LaPalomento

Fill out media dimensions in init segment

Add in a parser to unpack NAL byte streams. Hook up the old exponential Golomb decoder and parse media metadata out of the first sequence parameter set. Add more checks to test on the example segment transformation.
1 parent 1b73bdb0
......@@ -14,7 +14,7 @@
(function(window, videojs, undefined) {
'use strict';
var PacketStream, ParseStream, ProgramStream, Transmuxer, AacStream, H264Stream, MP2T_PACKET_LENGTH, H264_STREAM_TYPE, ADTS_STREAM_TYPE, mp4;
var PacketStream, ParseStream, ProgramStream, Transmuxer, AacStream, H264Stream, NalByteStream, MP2T_PACKET_LENGTH, H264_STREAM_TYPE, ADTS_STREAM_TYPE, mp4;
MP2T_PACKET_LENGTH = 188; // bytes
H264_STREAM_TYPE = 0x1b;
......@@ -285,6 +285,7 @@ ProgramStream = function() {
if (!stream.data.length) {
return;
}
event.trackId = stream.data[0].pid;
// reassemble the packet
while (stream.data.length) {
......@@ -394,11 +395,84 @@ AacStream = function() {
AacStream.prototype = new videojs.Hls.Stream();
/**
* Accepts a NAL unit byte stream and unpacks the embedded NAL units.
*/
NalByteStream = function() {
var
i = 6,
// the first NAL unit is prefixed by an extra zero byte
syncPoint = 1,
buffer;
NalByteStream.prototype.init.call(this);
this.push = function(data) {
var swapBuffer;
if (!buffer) {
buffer = data.data;
} else {
swapBuffer = new Uint8Array(buffer.byteLength + data.data.byteLength);
swapBuffer.set(buffer);
swapBuffer.set(data.data, buffer.byteLength);
buffer = swapBuffer;
}
// scan for synchronization byte sequences (0x00 00 01)
// a match looks like this:
// 0 0 1 .. NAL .. 0 0 1
// ^ sync point ^ i
while (i < buffer.byteLength) {
switch (buffer[i]) {
case 0:
i++;
break;
case 1:
// skip past non-sync sequences
if (buffer[i - 1] !== 0 ||
buffer[i - 2] !== 0) {
i += 3;
break;
}
// deliver the NAL unit
this.trigger('data', buffer.subarray(syncPoint + 3, i - 2));
syncPoint = i - 2;
i += 3;
break;
default:
i += 3;
break;
}
}
// filter out the NAL units that were delivered
buffer = buffer.subarray(syncPoint);
i -= syncPoint;
syncPoint = 0;
};
this.end = function() {
// deliver the last buffered NAL unit
if (buffer.byteLength > 3) {
this.trigger('data', buffer.subarray(syncPoint + 3));
}
};
};
NalByteStream.prototype = new videojs.Hls.Stream();
/**
* Accepts a ProgramStream and emits data events with parsed
* AAC Audio Frames of the individual packets.
*/
H264Stream = function() {
var self;
var
nalByteStream = new NalByteStream(),
self,
trackId,
readSequenceParameterSet,
skipScalingList;
H264Stream.prototype.init.call(this);
self = this;
......@@ -406,16 +480,159 @@ H264Stream = function() {
if (packet.type !== 'video') {
return;
}
switch (packet.data[0]) {
trackId = packet.trackId;
nalByteStream.push(packet);
};
nalByteStream.on('data', function(data) {
var event = {
trackId: trackId,
data: data
};
switch (data[0] & 0x1f) {
case 0x09:
packet.nalUnitType = 'access_unit_delimiter_rbsp';
event.nalUnitType = 'access_unit_delimiter_rbsp';
break;
case 0x07:
event.nalUnitType = 'seq_parameter_set_rbsp';
event.dimensions = readSequenceParameterSet(data.subarray(1));
break;
default:
break;
}
this.trigger('data', packet);
self.trigger('data', event);
});
this.end = function() {
nalByteStream.end();
};
/**
* Advance the ExpGolomb decoder past a scaling list. The scaling
* list is optionally transmitted as part of a sequence parameter
* set and is not relevant to transmuxing.
* @param count {number} the number of entries in this scaling list
* @param expGolombDecoder {object} an ExpGolomb pointed to the
* start of a scaling list
* @see Recommendation ITU-T H.264, Section 7.3.2.1.1.1
*/
skipScalingList = function(count, expGolombDecoder) {
var
lastScale = 8,
nextScale = 8,
j,
deltaScale;
for (j = 0; j < count; j++) {
if (nextScale !== 0) {
deltaScale = expGolombDecoder.readExpGolomb();
nextScale = (lastScale + deltaScale + 256) % 256;
}
lastScale = (nextScale === 0) ? lastScale : nextScale;
}
};
/**
* Read a sequence parameter set and return some interesting video
* properties. A sequence parameter set is the H264 metadata that
* describes the properties of upcoming video frames.
* @param data {Uint8Array} the bytes of a sequence parameter set
* @return {object} an object with width and height properties
* specifying the dimensions of the associated video frames.
*/
readSequenceParameterSet = function(data) {
var
frameCropLeftOffset = 0,
frameCropRightOffset = 0,
frameCropTopOffset = 0,
frameCropBottomOffset = 0,
expGolombDecoder, profileIdc, chromaFormatIdc, picOrderCntType,
numRefFramesInPicOrderCntCycle, picWidthInMbsMinus1,
picHeightInMapUnitsMinus1, frameMbsOnlyFlag,
scalingListCount,
i;
expGolombDecoder = new videojs.Hls.ExpGolomb(data);
profileIdc = expGolombDecoder.readUnsignedByte(); // profile_idc
// constraint_set[0-5]_flag, u(1), reserved_zero_2bits u(2), level_idc u()8
expGolombDecoder.skipBits(16);
expGolombDecoder.skipUnsignedExpGolomb(); // seq_parameter_set_id
// some profiles have more optional data we don't need
if (profileIdc === 100 ||
profileIdc === 110 ||
profileIdc === 122 ||
profileIdc === 244 ||
profileIdc === 44 ||
profileIdc === 83 ||
profileIdc === 86 ||
profileIdc === 118 ||
profileIdc === 128) {
chromaFormatIdc = expGolombDecoder.readUnsignedExpGolomb();
if (chromaFormatIdc === 3) {
expGolombDecoder.skipBits(1); // separate_colour_plane_flag
}
expGolombDecoder.skipUnsignedExpGolomb(); // bit_depth_luma_minus8
expGolombDecoder.skipUnsignedExpGolomb(); // bit_depth_chroma_minus8
expGolombDecoder.skipBits(1); // qpprime_y_zero_transform_bypass_flag
if (expGolombDecoder.readBoolean()) { // seq_scaling_matrix_present_flag
scalingListCount = (chromaFormatIdc !== 3) ? 8 : 12;
for (i = 0; i < scalingListCount; i++) {
if (expGolombDecoder.readBoolean()) { // seq_scaling_list_present_flag[ i ]
if (i < 6) {
skipScalingList(16, expGolombDecoder);
} else {
skipScalingList(64, expGolombDecoder);
}
}
}
}
}
expGolombDecoder.skipUnsignedExpGolomb(); // log2_max_frame_num_minus4
picOrderCntType = expGolombDecoder.readUnsignedExpGolomb();
if (picOrderCntType === 0) {
expGolombDecoder.readUnsignedExpGolomb(); //log2_max_pic_order_cnt_lsb_minus4
} else if (picOrderCntType === 1) {
expGolombDecoder.skipBits(1); // delta_pic_order_always_zero_flag
expGolombDecoder.skipExpGolomb(); // offset_for_non_ref_pic
expGolombDecoder.skipExpGolomb(); // offset_for_top_to_bottom_field
numRefFramesInPicOrderCntCycle = expGolombDecoder.readUnsignedExpGolomb();
for(i = 0; i < numRefFramesInPicOrderCntCycle; i++) {
expGolombDecoder.skipExpGolomb(); // offset_for_ref_frame[ i ]
}
}
expGolombDecoder.skipUnsignedExpGolomb(); // max_num_ref_frames
expGolombDecoder.skipBits(1); // gaps_in_frame_num_value_allowed_flag
picWidthInMbsMinus1 = expGolombDecoder.readUnsignedExpGolomb();
picHeightInMapUnitsMinus1 = expGolombDecoder.readUnsignedExpGolomb();
frameMbsOnlyFlag = expGolombDecoder.readBits(1);
if (frameMbsOnlyFlag === 0) {
expGolombDecoder.skipBits(1); // mb_adaptive_frame_field_flag
}
expGolombDecoder.skipBits(1); // direct_8x8_inference_flag
if (expGolombDecoder.readBoolean()) { // frame_cropping_flag
frameCropLeftOffset = expGolombDecoder.readUnsignedExpGolomb();
frameCropRightOffset = expGolombDecoder.readUnsignedExpGolomb();
frameCropTopOffset = expGolombDecoder.readUnsignedExpGolomb();
frameCropBottomOffset = expGolombDecoder.readUnsignedExpGolomb();
}
return {
width: ((picWidthInMbsMinus1 + 1) * 16) - frameCropLeftOffset * 2 - frameCropRightOffset * 2,
height: ((2 - frameMbsOnlyFlag) * (picHeightInMapUnitsMinus1 + 1) * 16) - (frameCropTopOffset * 2) - (frameCropBottomOffset * 2)
};
};
};
H264Stream.prototype = new videojs.Hls.Stream();
......@@ -424,9 +641,10 @@ Transmuxer = function() {
var
self = this,
sequenceNumber = 0,
initialized = false,
videoSamples = [],
videoSamplesSize = 0,
tracks,
dimensions,
packetStream, parseStream, programStream, aacStream, h264Stream,
......@@ -446,6 +664,42 @@ Transmuxer = function() {
programStream.pipe(aacStream);
programStream.pipe(h264Stream);
// handle incoming data events
h264Stream.on('data', function(data) {
var i;
// if this chunk starts a new access unit, flush the data we've been buffering
if (data.nalUnitType === 'access_unit_delimiter_rbsp' &&
videoSamples.length) {
//flushVideo();
}
// generate an init segment once all the metadata is available
if (data.nalUnitType === 'seq_parameter_set_rbsp' &&
!dimensions) {
dimensions = data.dimensions;
i = tracks.length;
while (i--) {
if (tracks[i].type === 'video') {
tracks[i].width = dimensions.width;
tracks[i].height = dimensions.height;
}
}
self.trigger('data', {
data: videojs.mp4.initSegment(tracks)
});
}
// buffer video until we encounter a new access unit (aka the next frame)
videoSamples.push(data);
videoSamplesSize += data.data.byteLength;
});
programStream.on('data', function(data) {
if (data.type === 'metadata') {
tracks = data.tracks;
}
});
// helper functions
flushVideo = function() {
var moof, mdat, boxes, i, data;
......@@ -478,28 +732,6 @@ Transmuxer = function() {
});
};
// handle incoming data events
h264Stream.on('data', function(data) {
// if this chunk starts a new access unit, flush the data we've been buffering
if (data.nalUnitType === 'access_unit_delimiter_rbsp' &&
videoSamples.length) {
flushVideo();
}
// buffer video until we encounter a new access unit (aka the next frame)
videoSamples.push(data);
videoSamplesSize += data.data.byteLength;
});
programStream.on('data', function(data) {
// generate init segments based on stream metadata
if (!initialized && data.type === 'metadata') {
self.trigger('data', {
data: mp4.initSegment(data.tracks)
});
initialized = true;
}
});
// feed incoming data to the front of the parsing pipeline
this.push = function(data) {
packetStream.push(data);
......@@ -507,6 +739,7 @@ Transmuxer = function() {
// flush any buffered data
this.end = function() {
programStream.end();
h264Stream.end();
if (videoSamples.length) {
flushVideo();
}
......
......@@ -40,6 +40,7 @@ var
PAT,
PMT,
standalonePes,
validateTrack,
videoPes;
......@@ -392,48 +393,31 @@ test('parses an elementary stream packet with a pts and dts', function() {
});
// helper function to create video PES packets
videoPes = function(data) {
if (data.length !== 2) {
throw new Error('video PES only accepts 2 byte payloads');
}
return [
0x47, // sync byte
videoPes = function(data, first) {
var
adaptationFieldLength = 188 - data.length - (first ? 18 : 17),
result = [
// sync byte
0x47,
// tei:0 pusi:1 tp:0 pid:0 0000 0001 0001
0x40, 0x11,
// tsc:01 afc:11 cc:0000
0x70,
// afl:1010 1100
0xac,
0x70
].concat([
// afl
adaptationFieldLength & 0xff,
// di:0 rai:0 espi:0 pf:0 of:0 spf:0 tpdf:0 afef:0
0x00,
// stuffing_bytes (171 bytes)
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff,
0x00
]),
i;
i = adaptationFieldLength - 1;
while (i--) {
// stuffing_bytes
result.push(0xff);
}
result = result.concat([
// pscp:0000 0000 0000 0000 0000 0001
0x00, 0x00, 0x01,
// sid:0000 0000 ppl:0000 0000 0000 0101
......@@ -444,9 +428,17 @@ videoPes = function(data) {
0x20,
// phdl:0000 0000
0x00
].concat(data);
]);
if (first) {
result.push(0x00);
}
result = result.concat([
// NAL unit start code
0x00, 0x00, 0x01
].concat(data));
return result;
};
standalonePes = videoPes([0xaf, 0x01]);
standalonePes = videoPes([0xaf, 0x01], true);
test('parses an elementary stream packet without a pts or dts', function() {
......@@ -465,9 +457,9 @@ test('parses an elementary stream packet without a pts or dts', function() {
ok(packet, 'parsed a packet');
equal('pes', packet.type, 'recognized a PES packet');
equal(0x1b, packet.streamType, 'tracked the stream_type');
equal(2, packet.data.byteLength, 'parsed two data bytes');
equal(0xaf, packet.data[0], 'parsed the first data byte');
equal(0x01, packet.data[1], 'parsed the second data byte');
equal(2 + 4, packet.data.byteLength, 'parsed two data bytes');
equal(0xaf, packet.data[packet.data.length - 2], 'parsed the first data byte');
equal(0x01, packet.data[packet.data.length - 1], 'parsed the second data byte');
ok(!packet.pts, 'did not parse a pts');
ok(!packet.dts, 'did not parse a dts');
});
......@@ -645,6 +637,85 @@ module('H264 Stream', {
h264Stream = new H264Stream();
}
});
test('unpacks nal units from simple byte stream framing', function() {
var data;
h264Stream.on('data', function(event) {
data = event;
});
// the simplest byte stream framing:
h264Stream.push({
type: 'video',
data: new Uint8Array([
0x00, 0x00, 0x00, 0x01,
0x09, 0x07,
0x00, 0x00, 0x01
])
});
ok(data, 'generated a data event');
equal(data.nalUnitType, 'access_unit_delimiter_rbsp', 'identified an access unit delimiter');
equal(data.data.length, 2, 'calculated nal unit length');
equal(data.data[1], 7, 'read a payload byte');
});
test('unpacks nal units from byte streams split across pushes', function() {
var data;
h264Stream.on('data', function(event) {
data = event;
});
// handles byte streams split across pushes
h264Stream.push({
type: 'video',
data: new Uint8Array([
0x00, 0x00, 0x00, 0x01,
0x09])
});
ok(!data, 'buffers NAL units across events');
h264Stream.push({
type: 'video',
data: new Uint8Array([
0x07,
0x00, 0x00, 0x01
])
});
ok(data, 'generated a data event');
equal(data.nalUnitType, 'access_unit_delimiter_rbsp', 'identified an access unit delimiter');
equal(data.data.length, 2, 'calculated nal unit length');
equal(data.data[1], 7, 'read a payload byte');
});
test('unpacks nal units from byte streams with split sync points', function() {
var data;
h264Stream.on('data', function(event) {
data = event;
});
// handles sync points split across pushes
h264Stream.push({
type: 'video',
data: new Uint8Array([
0x00, 0x00, 0x00, 0x01,
0x09, 0x07,
0x00])
});
ok(!data, 'buffers NAL units across events');
h264Stream.push({
type: 'video',
data: new Uint8Array([
0x00, 0x01
])
});
ok(data, 'generated a data event');
equal(data.nalUnitType, 'access_unit_delimiter_rbsp', 'identified an access unit delimiter');
equal(data.data.length, 2, 'calculated nal unit length');
equal(data.data[1], 7, 'read a payload byte');
});
test('parses nal unit types', function() {
var data;
h264Stream.on('data', function(event) {
......@@ -653,11 +724,32 @@ test('parses nal unit types', function() {
h264Stream.push({
type: 'video',
data: new Uint8Array([0x09])
data: new Uint8Array([
0x00, 0x00, 0x00, 0x01,
0x09
])
});
h264Stream.end();
ok(data, 'generated a data event');
equal(data.nalUnitType, 'access_unit_delimiter_rbsp', 'identified an access unit delimiter');
data = null;
h264Stream.push({
type: 'video',
data: new Uint8Array([
0x00, 0x00, 0x00, 0x01,
0x07,
0x27, 0x42, 0xe0, 0x0b,
0xa9, 0x18, 0x60, 0x9d,
0x80, 0x35, 0x06, 0x01,
0x06, 0xb6, 0xc2, 0xb5,
0xef, 0x7c, 0x04
])
});
h264Stream.end();
ok(data, 'generated a data event');
equal(data.nalUnitType, 'seq_parameter_set_rbsp', 'identified a sequence parameter set');
});
module('Transmuxer', {
......@@ -673,13 +765,20 @@ test('generates an init segment', function() {
});
transmuxer.push(packetize(PAT));
transmuxer.push(packetize(PMT));
transmuxer.push(packetize(standalonePes));
transmuxer.push(packetize(videoPes([
0x07,
0x27, 0x42, 0xe0, 0x0b,
0xa9, 0x18, 0x60, 0x9d,
0x80, 0x53, 0x06, 0x01,
0x06, 0xb6, 0xc2, 0xb5,
0xef, 0x7c, 0x04
], true)));
transmuxer.end();
equal(segments.length, 2, 'has an init segment');
});
test('buffers video samples until an access unit', function() {
test('buffers video samples until ended', function() {
var samples = [], boxes;
transmuxer.on('data', function(data) {
samples.push(data);
......@@ -688,34 +787,68 @@ test('buffers video samples until an access unit', function() {
transmuxer.push(packetize(PMT));
// buffer a NAL
transmuxer.push(packetize(videoPes([0x09, 0x01])));
transmuxer.push(packetize(videoPes([0x09, 0x01], true)));
transmuxer.push(packetize(videoPes([0x00, 0x02])));
// an access_unit_delimiter_rbsp should flush the buffer
// add an access_unit_delimiter_rbsp
transmuxer.push(packetize(videoPes([0x09, 0x03])));
transmuxer.push(packetize(videoPes([0x00, 0x04])));
equal(samples.length, 2, 'emitted two events');
boxes = videojs.inspectMp4(samples[1].data);
transmuxer.push(packetize(videoPes([0x00, 0x05])));
// flush everything
transmuxer.end();
equal(samples.length, 1, 'emitted one event');
boxes = videojs.inspectMp4(samples[0].data);
equal(boxes.length, 2, 'generated two boxes');
equal(boxes[0].type, 'moof', 'the first box is a moof');
equal(boxes[1].type, 'mdat', 'the second box is a mdat');
deepEqual(new Uint8Array(samples[1].data.subarray(samples[1].data.length - 4)),
new Uint8Array([0x09, 0x01, 0x00, 0x02]),
'concatenated NALs into an mdat');
// flush the last access unit
transmuxer.end();
equal(samples.length, 3, 'flushed the final access unit');
deepEqual(new Uint8Array(samples[2].data.subarray(samples[2].data.length - 4)),
new Uint8Array([0x09, 0x03, 0x00, 0x04]),
deepEqual(new Uint8Array(samples[0].data.subarray(samples[0].data.length - 10)),
new Uint8Array([
0x09, 0x01,
0x00, 0x02,
0x09, 0x03,
0x00, 0x04,
0x00, 0x05]),
'concatenated NALs into an mdat');
});
validateTrack = function(track, metadata) {
var mdia, handlerType;
equal(track.type, 'trak', 'wrote the track type');
equal(track.boxes.length, 2, 'wrote track children');
equal(track.boxes[0].type, 'tkhd', 'wrote the track header');
if (metadata) {
if (metadata.trackId) {
equal(track.boxes[0].trackId, metadata.trackId, 'wrote the track id');
}
if (metadata.width) {
equal(track.boxes[0].width, metadata.width, 'wrote the width');
}
if (metadata.height) {
equal(track.boxes[0].height, metadata.height, 'wrote the height');
}
}
mdia = track.boxes[1];
equal(mdia.type, 'mdia', 'wrote the media');
equal(mdia.boxes.length, 3, 'wrote the mdia children');
equal(mdia.boxes[0].type, 'mdhd', 'wrote the media header');
equal(mdia.boxes[0].language, 'und', 'the language is undefined');
equal(mdia.boxes[0].duration, 0xffffffff, 'the duration is at maximum');
equal(mdia.boxes[1].type, 'hdlr', 'wrote the media handler');
handlerType = mdia.boxes[1].handlerType;
equal(mdia.boxes[2].type, 'minf', 'wrote the media info');
};
test('parses an example mp2t file and generates media segments', function() {
var
segments = [],
sequenceNumber = window.Infinity,
i, boxes, mfhd, traf;
transmuxer.on('data', function(segment) {
segments.push(segment);
});
......@@ -729,8 +862,14 @@ test('parses an example mp2t file and generates media segments', function() {
equal(boxes[0].type, 'ftyp', 'the first box is an ftyp');
equal(boxes[1].type, 'moov', 'the second box is a moov');
equal(boxes[1].boxes[0].type, 'mvhd', 'generated an mvhd');
equal(boxes[1].boxes[1].type, 'trak', 'generated a trak');
equal(boxes[1].boxes[2].type, 'trak', 'generated a second trak');
validateTrack(boxes[1].boxes[1], {
trackId: 256,
width: 388,
height: 300
});
validateTrack(boxes[1].boxes[2], {
trackId: 257
});
equal(boxes[1].boxes[3].type, 'mvex', 'generated an mvex');
boxes = videojs.inspectMp4(segments[1].data);
......