54ca5993 by David LaPalomento

Translate H.264 Annex B streams to AVC elementary streams

Fragmented MP4s expected H264 data in mdats to a sequence of NAL units preceded by their length. MPEG2-TS streams transmit H264 data using start codes to signal the beginning of aNAL. When parsing NALs to build the sample table, reformat them to remove start codes and add length fields. Pulled the video-related program stream parsing into its own stream object because Transmuxer was getting too busy.
1 parent 31e49fcd
......@@ -365,7 +365,7 @@ stsd = function(track) {
track.profileIdc, // AVCProfileIndication
track.profileCompatibility, // profile_compatibility
track.levelIdc, // AVCLevelIndication
0xff // lengthSizeMinusOne
0xff // lengthSizeMinusOne, hard-coded to 4 bytes
].concat([
track.sps.length // numOfSequenceParameterSets
]).concat(sequenceParameterSets).concat([
......@@ -438,7 +438,7 @@ traf = function(track) {
0x00, 0x00, 0x00, 0x00 // baseMediaDecodeTime
])),
trun(track,
sdtp.length +
sampleDependencyTable.length +
16 + // tfhd
16 + // tfdt
8 + // traf header
......
......@@ -14,7 +14,10 @@
(function(window, videojs, undefined) {
'use strict';
var PacketStream, ParseStream, ProgramStream, Transmuxer, AacStream, H264Stream, NalByteStream, MP2T_PACKET_LENGTH, H264_STREAM_TYPE, ADTS_STREAM_TYPE, mp4;
var
PacketStream, ParseStream, ProgramStream, VideoSegmentStream,
Transmuxer, AacStream, H264Stream, NalByteStream,
MP2T_PACKET_LENGTH, H264_STREAM_TYPE, ADTS_STREAM_TYPE, mp4;
MP2T_PACKET_LENGTH = 188; // bytes
H264_STREAM_TYPE = 0x1b;
......@@ -402,7 +405,6 @@ AacStream.prototype = new videojs.Hls.Stream();
NalByteStream = function() {
var
i = 6,
// the first NAL unit is prefixed by an extra zero byte
syncPoint = 1,
buffer;
NalByteStream.prototype.init.call(this);
......@@ -419,16 +421,37 @@ NalByteStream = function() {
buffer = swapBuffer;
}
// scan for synchronization byte sequences (0x00 00 01)
// Rec. ITU-T H.264, Annex B
// scan for NAL unit boundaries
// a match looks like this:
// 0 0 1 .. NAL .. 0 0 1
// ^ sync point ^ i
// or this:
// 0 0 1 .. NAL .. 0 0 0
// ^ sync point ^ i
while (i < buffer.byteLength) {
switch (buffer[i]) {
case 0:
// skip past non-sync sequences
if (buffer[i - 1] !== 0) {
i += 2;
break;
} else if (buffer[i - 2] !== 0) {
i++;
break;
}
// deliver the NAL unit
this.trigger('data', buffer.subarray(syncPoint + 3, i - 2));
// drop trailing zeroes
do {
i++;
} while (buffer[i] !== 1);
syncPoint = i - 2;
i += 3;
break;
case 1:
// skip past non-sync sequences
if (buffer[i - 1] !== 0 ||
......@@ -463,8 +486,8 @@ NalByteStream = function() {
NalByteStream.prototype = new videojs.Hls.Stream();
/**
* Accepts a ProgramStream and emits data events with parsed
* AAC Audio Frames of the individual packets.
* Accepts input from a ProgramStream and produces H.264 NAL unit data
* events.
*/
H264Stream = function() {
var
......@@ -657,98 +680,30 @@ H264Stream = function() {
};
H264Stream.prototype = new videojs.Hls.Stream();
Transmuxer = function() {
/**
* @param track {object} track metadata configuration
*/
VideoSegmentStream = function(track) {
var
self = this,
sequenceNumber = 0,
videoSamples = [],
videoSamplesSize = 0,
track,
config,
pps,
packetStream, parseStream, programStream, aacStream, h264Stream,
flushVideo;
Transmuxer.prototype.init.call(this);
// set up the parsing pipeline
packetStream = new PacketStream();
parseStream = new ParseStream();
programStream = new ProgramStream();
aacStream = new AacStream();
h264Stream = new H264Stream();
packetStream.pipe(parseStream);
parseStream.pipe(programStream);
programStream.pipe(aacStream);
programStream.pipe(h264Stream);
nalUnits = [],
nalUnitsLength = 0;
VideoSegmentStream.prototype.init.call(this);
// handle incoming data events
h264Stream.on('data', function(data) {
// if this chunk starts a new access unit, flush the data we've been buffering
if (data.nalUnitType === 'access_unit_delimiter_rbsp' &&
videoSamples.length) {
//flushVideo();
}
// record the track config
if (data.nalUnitType === 'seq_parameter_set_rbsp' &&
!config) {
config = data.config;
track.width = config.width;
track.height = config.height;
track.sps = [data.data];
track.profileIdc = config.profileIdc;
track.levelIdc = config.levelIdc;
track.profileCompatibility = config.profileCompatibility;
// generate an init segment once all the metadata is available
if (pps) {
self.trigger('data', {
data: videojs.mp4.initSegment([track])
});
}
}
if (data.nalUnitType === 'pic_parameter_set_rbsp' &&
!pps) {
pps = data.data;
track.pps = [data.data];
if (config) {
self.trigger('data', {
data: videojs.mp4.initSegment([track])
});
}
}
// buffer video until we encounter a new access unit (aka the next frame)
videoSamples.push(data);
videoSamplesSize += data.data.byteLength;
});
programStream.on('data', function(data) {
var i;
if (data.type === 'metadata') {
i = data.tracks.length;
while (i--) {
if (data.tracks[i].type === 'video') {
track = data.tracks[i];
break;
}
}
}
});
this.push = function(data) {
// buffer video until end() is called
nalUnits.push(data);
nalUnitsLength += data.data.byteLength;
};
// helper functions
flushVideo = function() {
var startUnit, currentNal, moof, mdat, boxes, i, data, sample;
this.end = function() {
var startUnit, currentNal, moof, mdat, boxes, i, data, view, sample;
// concatenate the video data and construct the mdat
// first, we have to build the index from byte locations to
// samples (i.e. frames) in the video data
data = new Uint8Array(videoSamplesSize);
// samples (that is, frames) in the video data
data = new Uint8Array(nalUnitsLength + (4 * nalUnits.length));
view = new DataView(data.buffer);
track.samples = [];
sample = {
size: 0,
......@@ -761,8 +716,8 @@ Transmuxer = function() {
}
};
i = 0;
while (videoSamples.length) {
currentNal = videoSamples[0];
while (nalUnits.length) {
currentNal = nalUnits[0];
// flush the sample we've been building when a new sample is started
if (currentNal.nalUnitType === 'access_unit_delimiter_rbsp') {
if (startUnit) {
......@@ -785,20 +740,23 @@ Transmuxer = function() {
if (currentNal.nalUnitType === 'slice_layer_without_partitioning_rbsp_idr') {
// the current sample is a key frame
sample.flags.dependsOn = 2;
}
sample.size += 4; // space for the NAL length
sample.size += currentNal.data.byteLength;
view.setUint32(i, currentNal.data.byteLength);
i += 4;
data.set(currentNal.data, i);
i += currentNal.data.byteLength;
videoSamples.shift();
nalUnits.shift();
}
// record the last sample
if (track.samples.length) {
sample.duration = track.samples[track.samples.length - 1].duration;
}
track.samples.push(sample);
videoSamplesSize = 0;
nalUnitsLength = 0;
mdat = mp4.mdat(data);
moof = mp4.moof(sequenceNumber, [track]);
......@@ -813,10 +771,89 @@ Transmuxer = function() {
boxes.set(moof);
boxes.set(mdat, moof.byteLength);
this.trigger('data', boxes);
};
};
VideoSegmentStream.prototype = new videojs.Hls.Stream();
Transmuxer = function() {
var
self = this,
track,
config,
pps,
packetStream, parseStream, programStream, aacStream, h264Stream, videoSegmentStream;
Transmuxer.prototype.init.call(this);
// set up the parsing pipeline
packetStream = new PacketStream();
parseStream = new ParseStream();
programStream = new ProgramStream();
aacStream = new AacStream();
h264Stream = new H264Stream();
packetStream.pipe(parseStream);
parseStream.pipe(programStream);
programStream.pipe(aacStream);
programStream.pipe(h264Stream);
// handle incoming data events
h264Stream.on('data', function(data) {
// record the track config
if (data.nalUnitType === 'seq_parameter_set_rbsp' &&
!config) {
config = data.config;
track.width = config.width;
track.height = config.height;
track.sps = [data.data];
track.profileIdc = config.profileIdc;
track.levelIdc = config.levelIdc;
track.profileCompatibility = config.profileCompatibility;
// generate an init segment once all the metadata is available
if (pps) {
self.trigger('data', {
data: videojs.mp4.initSegment([track])
});
}
}
if (data.nalUnitType === 'pic_parameter_set_rbsp' &&
!pps) {
pps = data.data;
track.pps = [data.data];
if (config) {
self.trigger('data', {
data: videojs.mp4.initSegment([track])
});
}
}
});
// hook up the video segment stream once track metadata is delivered
programStream.on('data', function(data) {
var i, triggerData = function(segment) {
self.trigger('data', {
data: boxes
data: segment
});
};
if (data.type === 'metadata') {
i = data.tracks.length;
while (i--) {
if (data.tracks[i].type === 'video') {
track = data.tracks[i];
if (!videoSegmentStream) {
videoSegmentStream = new VideoSegmentStream(track);
h264Stream.pipe(videoSegmentStream);
videoSegmentStream.on('data', triggerData);
}
break;
}
}
}
});
// feed incoming data to the front of the parsing pipeline
this.push = function(data) {
......@@ -826,9 +863,7 @@ Transmuxer = function() {
this.end = function() {
programStream.end();
h264Stream.end();
if (videoSamples.length) {
flushVideo();
}
videoSegmentStream.end();
};
};
Transmuxer.prototype = new videojs.Hls.Stream();
......@@ -841,6 +876,7 @@ window.videojs.mp2t = {
PacketStream: PacketStream,
ParseStream: ParseStream,
ProgramStream: ProgramStream,
VideoSegmentStream: VideoSegmentStream,
Transmuxer: Transmuxer,
AacStream: AacStream,
H264Stream: H264Stream
......
......@@ -29,6 +29,8 @@ var
programStream,
H264Stream = videojs.mp2t.H264Stream,
h264Stream,
VideoSegmentStream = videojs.mp2t.VideoSegmentStream,
videoSegmentStream,
Transmuxer = videojs.mp2t.Transmuxer,
transmuxer,
......@@ -782,6 +784,103 @@ test('parses nal unit types', function() {
equal(data.nalUnitType, 'slice_layer_without_partitioning_rbsp_idr', 'identified a key frame');
});
// MP4 expects H264 (aka AVC) data to be in storage format. Storage
// format is optimized for reliable, random-access media in contrast
// to the byte stream format that retransmits metadata regularly to
// allow decoders to quickly begin operation from wherever in the
// broadcast they begin receiving.
// Details on the byte stream format can be found in Annex B of
// Recommendation ITU-T H.264.
// The storage format is described in ISO/IEC 14496-15
test('strips byte stream framing during parsing', function() {
var data = [];
h264Stream.on('data', function(event) {
data.push(event);
});
h264Stream.push({
type: 'video',
data: new Uint8Array([
// -- NAL unit start
// zero_byte
0x00,
// start_code_prefix_one_3bytes
0x00, 0x00, 0x01,
// nal_unit_type (picture parameter set)
0x08,
// fake data
0x01, 0x02, 0x03, 0x04,
0x05, 0x06, 0x07,
// trailing_zero_8bits * 5
0x00, 0x00, 0x00, 0x00,
0x00,
// -- NAL unit start
// zero_byte
0x00,
// start_code_prefix_one_3bytes
0x00, 0x00, 0x01,
// nal_unit_type (access_unit_delimiter_rbsp)
0x09,
// fake data
0x06, 0x05, 0x04, 0x03,
0x02, 0x01, 0x00
])
});
h264Stream.end();
equal(data.length, 2, 'parsed two NAL units');
deepEqual(new Uint8Array([
0x08,
0x01, 0x02, 0x03, 0x04,
0x05, 0x06, 0x07
]), data[0].data, 'parsed the first NAL unit');
deepEqual(new Uint8Array([
0x09,
0x06, 0x05, 0x04, 0x03,
0x02, 0x01, 0x00
]), data[1].data, 'parsed the second NAL unit');
});
module('VideoSegmentStream', {
setup: function() {
videoSegmentStream = new VideoSegmentStream({});
}
});
// see ISO/IEC 14496-15, Section 5 "AVC elementary streams and sample definitions"
test('concatenates NAL units into AVC elementary streams', function() {
var segment, boxes;
videoSegmentStream.on('data', function(data) {
segment = data;
});
videoSegmentStream.push({
data: new Uint8Array([
0x08,
0x01, 0x02, 0x03
])
});
videoSegmentStream.push({
data: new Uint8Array([
0x08,
0x04, 0x03, 0x02, 0x01, 0x00
])
});
videoSegmentStream.end();
ok(segment, 'generated a data event');
boxes = videojs.inspectMp4(segment);
equal(boxes[1].byteLength,
(4 + 4) + (4 + 6),
'wrote the correct number of bytes');
deepEqual(new Uint8Array(segment.subarray(boxes[0].size + 8)), new Uint8Array([
0, 0, 0, 4,
0x08, 0x01, 0x02, 0x03,
0, 0, 0, 6,
0x08, 0x04, 0x03, 0x02, 0x01, 0x00
]), 'wrote an AVC stream into the mdat');
});
module('Transmuxer', {
setup: function() {
transmuxer = new Transmuxer();
......@@ -832,12 +931,17 @@ test('buffers video samples until ended', function() {
equal(boxes.length, 2, 'generated two boxes');
equal(boxes[0].type, 'moof', 'the first box is a moof');
equal(boxes[1].type, 'mdat', 'the second box is a mdat');
deepEqual(new Uint8Array(samples[0].data.subarray(samples[0].data.length - 10)),
deepEqual(new Uint8Array(samples[0].data.subarray(boxes[0].size + 8)),
new Uint8Array([
0, 0, 0, 2,
0x09, 0x01,
0, 0, 0, 2,
0x00, 0x02,
0, 0, 0, 2,
0x09, 0x03,
0, 0, 0, 2,
0x00, 0x04,
0, 0, 0, 2,
0x00, 0x05]),
'concatenated NALs into an mdat');
});
......@@ -873,8 +977,8 @@ validateTrack = function(track, metadata) {
equal(mdia.boxes[2].type, 'minf', 'wrote the media info');
};
validateTrackFragment = function(track, metadata) {
var tfhd, trun, sdtp, i, sample;
validateTrackFragment = function(track, segment, metadata) {
var tfhd, trun, sdtp, i, j, sample, nalUnitType;
equal(track.type, 'traf', 'wrote a track fragment');
equal(track.boxes.length, 4, 'wrote four track fragment children');
tfhd = track.boxes[0];
......@@ -884,18 +988,15 @@ validateTrackFragment = function(track, metadata) {
equal(track.boxes[1].type,
'tfdt',
'wrote a track fragment decode time box');
ok(track.boxes[1].baseMediaDecodeTime >= 0, 'base decode time is valid');
ok(track.boxes[1].baseMediaDecodeTime >= 0, 'base decode time is non-negative');
trun = track.boxes[2];
ok(trun.dataOffset >= 0, 'set data offset');
equal(trun.dataOffset,
trun.size +
16 + // mfhd size
8 + // moof header size
8, // mdat header size
'uses movie fragment relative addressing');
metadata.mdatOffset + 8,
'trun data offset is the size of the moof');
ok(trun.samples.length > 0, 'generated media samples');
for (i = 0; i < trun.samples.length; i++) {
for (i = 0, j = trun.dataOffset; i < trun.samples.length; i++) {
sample = trun.samples[i];
ok(sample.duration > 0, 'wrote a positive duration for sample ' + i);
ok(sample.size > 0, 'wrote a positive size for sample ' + i);
......@@ -903,11 +1004,17 @@ validateTrackFragment = function(track, metadata) {
'wrote a positive composition time offset for sample ' + i);
ok(sample.flags, 'wrote sample flags');
equal(sample.flags.isLeading, 0, 'the leading nature is unknown');
notEqual(sample.flags.dependsOn, 0, 'sample dependency is not unknown');
notEqual(sample.flags.dependsOn, 4, 'sample dependency is valid');
nalUnitType = segment[j + 4] & 0x1F;
equal(nalUnitType, 9, 'samples begin with an access_unit_delimiter_rbsp');
equal(sample.flags.isDependedOn, 0, 'dependency of other samples is unknown');
equal(sample.flags.hasRedundancy, 0, 'sample redundancy is unknown');
equal(sample.flags.degradationPriority, 0, 'sample degradation priority is zero');
j += sample.size; // advance to the next sample in the mdat
}
sdtp = track.boxes[3];
......@@ -970,12 +1077,13 @@ test('parses an example mp2t file and generates media segments', function() {
ok(mfhd.sequenceNumber < sequenceNumber, 'sequence numbers are increasing');
sequenceNumber = mfhd.sequenceNumber;
validateTrackFragment(boxes[i].boxes[1], {
equal(boxes[i + 1].type, 'mdat', 'second box is an mdat');
validateTrackFragment(boxes[i].boxes[1], segments[1].data, {
trackId: 256,
width: 388,
height: 300
height: 300,
mdatOffset: boxes[0].size
});
equal(boxes[i + 1].type, 'mdat', 'second box is an mdat');
}
});
......