237d9b4e by David LaPalomento

Do not re-add in-band cues when seeking or changing playlists

Previously, in-band metadata cues were added whenever they were encountered during the segment parsing process. If you seeked in a stream, this would cause the same cues to be added multiple times when its containing segment was re-buffered. Now, cues that occur after current time are cleared on every seek which allows them to be re-added without duplication after they're re-parsed. Cues before the current time are retained because re-buffering would not cause them to be recreated. Adjust cue point creation to take live stream segment expiration into account.
1 parent b27cedcc
......@@ -173,13 +173,6 @@
(tag.data[19]);
}
// adjust the PTS values to align with the video and audio
// streams
if (this.timestampOffset) {
tag.pts -= this.timestampOffset;
tag.dts -= this.timestampOffset;
}
// parse one or more ID3 frames
// http://id3.org/id3v2.3.0#ID3v2_frame_overview
do {
......
......@@ -19,10 +19,7 @@
streamBuffer = new Uint8Array(MP2T_PACKET_LENGTH),
streamBufferByteCount = 0,
h264Stream = new H264Stream(),
aacStream = new AacStream(),
h264HasTimeStampOffset = false,
aacHasTimeStampOffset = false,
timeStampOffset;
aacStream = new AacStream();
// expose the stream metadata
self.stream = {
......@@ -34,6 +31,13 @@
// allow in-band metadata to be observed
self.metadataStream = new MetadataStream();
// The first timestamp value encountered during parsing. This
// value can be used to determine the relative timing between
// frames and the start of the current timestamp sequence. It
// should be reset to null before parsing a segment with
// discontinuous timestamp values from previous segments.
self.timestampOffset = null;
// For information on the FLV format, see
// http://download.macromedia.com/f4v/video_file_format_spec_v10_1.pdf.
// Technically, this function returns the header and a metadata FLV tag
......@@ -354,31 +358,18 @@
// Skip past "optional" portion of PTS header
offset += pesHeaderLength;
// align the metadata stream PTS values with the start of
// the other elementary streams
if (!self.metadataStream.timestampOffset) {
self.metadataStream.timestampOffset = pts;
// keep track of the earliest encounted PTS value so
// external parties can align timestamps across
// discontinuities
if (self.timestampOffset === null) {
self.timestampOffset = pts;
}
if (pid === self.stream.programMapTable[STREAM_TYPES.h264]) {
if (!h264HasTimeStampOffset) {
h264HasTimeStampOffset = true;
if (timeStampOffset === undefined) {
timeStampOffset = pts;
}
h264Stream.setTimeStampOffset(timeStampOffset);
}
h264Stream.setNextTimeStamp(pts,
dts,
dataAlignmentIndicator);
} else if (pid === self.stream.programMapTable[STREAM_TYPES.adts]) {
if (!aacHasTimeStampOffset) {
aacHasTimeStampOffset = true;
if (timeStampOffset === undefined) {
timeStampOffset = pts;
}
aacStream.setTimeStampOffset(timeStampOffset);
}
aacStream.setNextTimeStamp(pts,
pesPacketSize,
dataAlignmentIndicator);
......
......@@ -46,13 +46,6 @@ videojs.Hls = videojs.Flash.extend({
// buffered data should be appended to the source buffer
this.startCheckingBuffer_();
// the earliest presentation timestamp (PTS) encountered since the
// last #EXT-X-DISCONTINUITY. In a playlist without
// discontinuities, this will be the PTS value for the first frame
// in the video. PTS values are necessary to properly synchronize
// playback when switching to a variant stream.
this.lastStartingPts_ = undefined;
videojs.Hls.prototype.src.call(this, options.source && options.source.src);
}
});
......@@ -96,43 +89,7 @@ videojs.Hls.prototype.src = function(src) {
// if the stream contains ID3 metadata, expose that as a metadata
// text track
(function() {
var
metadataStream = tech.segmentParser_.metadataStream,
textTrack;
// only expose metadata tracks to video.js versions that support
// dynamic text tracks (4.12+)
if (!tech.player().addTextTrack) {
return;
}
metadataStream.on('data', function(metadata) {
var i, cue, frame, time, hexDigit;
// create the metadata track if this is the first ID3 tag we've
// seen
if (!textTrack) {
textTrack = tech.player().addTextTrack('metadata', 'Timed Metadata');
// build the dispatch type from the stream descriptor
// https://html.spec.whatwg.org/multipage/embedded-content.html#steps-to-expose-a-media-resource-specific-text-track
textTrack.inBandMetadataTrackDispatchType = videojs.Hls.SegmentParser.STREAM_TYPES.metadata.toString(16).toUpperCase();
for (i = 0; i < metadataStream.descriptor.length; i++) {
hexDigit = ('00' + metadataStream.descriptor[i].toString(16).toUpperCase()).slice(-2);
textTrack.inBandMetadataTrackDispatchType += hexDigit;
}
}
for (i = 0; i < metadata.frames.length; i++) {
frame = metadata.frames[i];
time = metadata.pts / 1000;
cue = new window.VTTCue(time, time, frame.value || frame.url || '');
cue.frame = frame;
textTrack.addCue(cue);
}
});
})();
this.setupMetadataCueTranslation_();
// load the MediaSource into the player
this.mediaSource.addEventListener('sourceopen', videojs.bind(this, this.handleSourceOpen));
......@@ -289,6 +246,78 @@ videojs.Hls.prototype.handleSourceOpen = function() {
}
};
// register event listeners to transform in-band metadata events into
// VTTCues on a text track
videojs.Hls.prototype.setupMetadataCueTranslation_ = function() {
var
tech = this,
metadataStream = tech.segmentParser_.metadataStream,
textTrack;
// only expose metadata tracks to video.js versions that support
// dynamic text tracks (4.12+)
if (!tech.player().addTextTrack) {
return;
}
// add a metadata cue whenever a metadata event is triggered during
// segment parsing
metadataStream.on('data', function(metadata) {
var i, cue, frame, time, media, segmentOffset, hexDigit;
// create the metadata track if this is the first ID3 tag we've
// seen
if (!textTrack) {
textTrack = tech.player().addTextTrack('metadata', 'Timed Metadata');
// build the dispatch type from the stream descriptor
// https://html.spec.whatwg.org/multipage/embedded-content.html#steps-to-expose-a-media-resource-specific-text-track
textTrack.inBandMetadataTrackDispatchType = videojs.Hls.SegmentParser.STREAM_TYPES.metadata.toString(16).toUpperCase();
for (i = 0; i < metadataStream.descriptor.length; i++) {
hexDigit = ('00' + metadataStream.descriptor[i].toString(16).toUpperCase()).slice(-2);
textTrack.inBandMetadataTrackDispatchType += hexDigit;
}
}
// calculate the start time for the segment that is currently being parsed
media = tech.playlists.media();
segmentOffset = tech.playlists.expiredPreDiscontinuity_ + tech.playlists.expiredPostDiscontinuity_;
segmentOffset += videojs.Hls.Playlist.duration(media, media.mediaSequence, media.mediaSequence + tech.mediaIndex);
// create cue points for all the ID3 frames in this metadata event
for (i = 0; i < metadata.frames.length; i++) {
frame = metadata.frames[i];
time = segmentOffset + ((metadata.pts - tech.segmentParser_.timestampOffset) * 0.001);
cue = new window.VTTCue(time, time, frame.value || frame.url || '');
cue.frame = frame;
textTrack.addCue(cue);
}
});
// when seeking, clear out all cues ahead of the earliest position
// in the new segment. keep earlier cues around so they can still be
// programmatically inspected even though they've already fired
tech.on('seeking', function() {
if (!textTrack) {
return;
}
var media = tech.playlists.media(), i;
var startTime = tech.playlists.expiredPreDiscontinuity_ + tech.playlists.expiredPostDiscontinuity_;
startTime += videojs.Hls.Playlist.duration(media, media.mediaSequence, media.mediaSequence + tech.mediaIndex);
console.trace('seeking');
i = textTrack.cues.length;
while (i--) {
if (textTrack.cues[i].startTime < startTime) {
// cues are sorted by start time, earliest first, so all the
// rest of the cues are from earlier segments
break;
}
textTrack.removeCue(textTrack.cues[i])
}
});
};
/**
* Reset the mediaIndex if play() is called after the video has
* ended.
......@@ -815,6 +844,13 @@ videojs.Hls.prototype.drainBuffer = function(event) {
segmentOffset += videojs.Hls.Playlist.duration(playlist, playlist.mediaSequence, playlist.mediaSequence + mediaIndex);
segmentOffset *= 1000;
// if this segment starts is the start of a new discontinuity
// sequence, the segment parser's timestamp offset must be
// re-calculated
if (segment.discontinuity) {
this.segmentParser_.timestampOffset = null;
}
// transmux the segment data from MP2T to FLV
this.segmentParser_.parseSegmentBinaryData(bytes);
this.segmentParser_.flushTags();
......
......@@ -186,28 +186,6 @@
// too large/small tag size values
// too large/small frame size values
test('translates PTS and DTS values based on the timestamp offset', function() {
var events = [];
metadataStream.on('data', function(event) {
events.push(event);
});
metadataStream.timestampOffset = 800;
metadataStream.push({
trackId: 7,
pts: 1000,
dts: 900,
// header
data: new Uint8Array(id3Tag(id3Frame('XFFF', [0]), [0x00, 0x00]))
});
equal(events.length, 1, 'emitted an event');
equal(events[0].pts, 200, 'translated pts');
equal(events[0].dts, 100, 'translated dts');
});
test('parses TXXX frames', function() {
var events = [];
metadataStream.on('data', function(event) {
......
......@@ -15,7 +15,7 @@
<script src="../libs/qunit/qunit.js"></script>
<!-- video.js -->
<script src="../node_modules/video.js/dist/video-js/video.js"></script>
<script src="../node_modules/video.js/dist/video-js/video.dev.js"></script>
<script src="../node_modules/videojs-contrib-media-sources/src/videojs-media-sources.js"></script>
<!-- HLS plugin -->
......
......@@ -1249,6 +1249,218 @@ test('exposes in-band metadata events as cues', function() {
'set the private data');
});
test('only adds in-band cues the first time they are encountered', function() {
var tags = [{ pts: 0, bytes: new Uint8Array(1) }], track;
player.src({
src: 'manifest/media.m3u8',
type: 'application/vnd.apple.mpegurl'
});
openMediaSource(player);
player.hls.segmentParser_.getNextTag = function() {
return tags.shift();
};
player.hls.segmentParser_.tagsAvailable = function() {
return tags.length;
};
player.hls.segmentParser_.parseSegmentBinaryData = function() {
// fake out a descriptor
player.hls.segmentParser_.metadataStream.descriptor = new Uint8Array([
1, 2, 3, 0xbb
]);
// trigger a metadata event
player.hls.segmentParser_.metadataStream.trigger('data', {
pts: 2000,
data: new Uint8Array([]),
frames: [{
id: 'TXXX',
value: 'cue text'
}]
});
};
standardXHRResponse(requests.shift());
standardXHRResponse(requests.shift());
// seek back to the first segment
player.currentTime(0);
player.hls.trigger('seeking');
tags.push({ pts: 0, bytes: new Uint8Array(1) });
standardXHRResponse(requests.shift());
track = player.textTracks()[0];
equal(track.cues.length, 1, 'only added the cue once');
});
test('clears in-band cues ahead of current time on seek', function() {
var
tags = [],
events = [],
track;
player.src({
src: 'manifest/media.m3u8',
type: 'application/vnd.apple.mpegurl'
});
openMediaSource(player);
player.hls.segmentParser_.getNextTag = function() {
return tags.shift();
};
player.hls.segmentParser_.tagsAvailable = function() {
return tags.length;
};
player.hls.segmentParser_.parseSegmentBinaryData = function() {
// fake out a descriptor
player.hls.segmentParser_.metadataStream.descriptor = new Uint8Array([
1, 2, 3, 0xbb
]);
// trigger a metadata event
if (events.length) {
player.hls.segmentParser_.metadataStream.trigger('data', events.shift());
}
};
standardXHRResponse(requests.shift()); // media
tags.push({ pts: 10 * 1000, bytes: new Uint8Array(1) });
events.push({
pts: 9.9 * 1000,
data: new Uint8Array([]),
frames: [{
id: 'TXXX',
value: 'cue 1'
}]
});
standardXHRResponse(requests.shift()); // segment 0
tags.push({ pts: 20 * 1000, bytes: new Uint8Array(1) });
events.push({
pts: 19.9 * 1000,
data: new Uint8Array([]),
frames: [{
id: 'TXXX',
value: 'cue 2'
}]
});
player.hls.checkBuffer_();
standardXHRResponse(requests.shift()); // segment 1
track = player.textTracks()[0];
equal(track.cues.length, 2, 'added the cues');
// seek into segment 1
player.currentTime(11);
player.hls.trigger('seeking');
equal(track.cues.length, 1, 'removed a cue');
equal(track.cues[0].startTime, 9.9, 'retained the earlier cue');
});
test('translates ID3 PTS values to cue media timeline positions', function() {
var tags = [{ pts: 4 * 1000, bytes: new Uint8Array(1) }], track;
player.src({
src: 'manifest/media.m3u8',
type: 'application/vnd.apple.mpegurl'
});
openMediaSource(player);
player.hls.segmentParser_.getNextTag = function() {
return tags.shift();
};
player.hls.segmentParser_.tagsAvailable = function() {
return tags.length;
};
player.hls.segmentParser_.parseSegmentBinaryData = function() {
// setup the timestamp offset
this.timestampOffset = tags[0].pts;
// fake out a descriptor
player.hls.segmentParser_.metadataStream.descriptor = new Uint8Array([
1, 2, 3, 0xbb
]);
// trigger a metadata event
player.hls.segmentParser_.metadataStream.trigger('data', {
pts: 5 * 1000,
data: new Uint8Array([]),
frames: [{
id: 'TXXX',
value: 'cue text'
}]
});
};
standardXHRResponse(requests.shift()); // media
standardXHRResponse(requests.shift()); // segment 0
track = player.textTracks()[0];
equal(track.cues[0].startTime, 1, 'translated startTime');
equal(track.cues[0].endTime, 1, 'translated startTime');
});
test('translates ID3 PTS values across discontinuities', function() {
var tags = [], events = [], track;
player.src({
src: 'cues-and-discontinuities.m3u8',
type: 'application/vnd.apple.mpegurl'
});
openMediaSource(player);
player.hls.segmentParser_.getNextTag = function() {
return tags.shift();
};
player.hls.segmentParser_.tagsAvailable = function() {
return tags.length;
};
player.hls.segmentParser_.parseSegmentBinaryData = function() {
if (this.timestampOffset === null) {
this.timestampOffset = tags[0].pts;
}
// fake out a descriptor
player.hls.segmentParser_.metadataStream.descriptor = new Uint8Array([
1, 2, 3, 0xbb
]);
// trigger a metadata event
if (events.length) {
player.hls.segmentParser_.metadataStream.trigger('data', events.shift());
}
};
// media playlist
requests.shift().respond(200, null,
'#EXTM3U\n' +
'#EXTINF:10,\n' +
'0.ts\n' +
'#EXT-X-DISCONTINUITY\n' +
'#EXTINF:10,\n' +
'1.ts\n');
// segment 0 starts at PTS 14000 and has a cue point at 15000
tags.push({ pts: 14 * 1000, bytes: new Uint8Array(1) });
events.push({
pts: 15 * 1000,
data: new Uint8Array([]),
frames: [{
id: 'TXXX',
value: 'cue 0'
}]
});
standardXHRResponse(requests.shift()); // segment 0
// segment 1 is after a discontinuity, starts at PTS 22000
// and has a cue point at 15000
tags.push({ pts: 22 * 1000, bytes: new Uint8Array(1) });
events.push({
pts: 23 * 1000,
data: new Uint8Array([]),
frames: [{
id: 'TXXX',
value: 'cue 0'
}]
});
player.hls.checkBuffer_();
standardXHRResponse(requests.shift());
track = player.textTracks()[0];
equal(track.cues.length, 2, 'created cues');
equal(track.cues[0].startTime, 1, 'first cue started at the correct time');
equal(track.cues[0].endTime, 1, 'first cue ended at the correct time');
equal(track.cues[1].startTime, 11, 'second cue started at the correct time');
equal(track.cues[1].endTime, 11, 'second cue ended at the correct time');
});
test('drops tags before the target timestamp when seeking', function() {
var i = 10,
tags = [],
......