237d9b4e by David LaPalomento

Do not re-add in-band cues when seeking or changing playlists

Previously, in-band metadata cues were added whenever they were encountered during the segment parsing process. If you seeked in a stream, this would cause the same cues to be added multiple times when its containing segment was re-buffered. Now, cues that occur after current time are cleared on every seek which allows them to be re-added without duplication after they're re-parsed. Cues before the current time are retained because re-buffering would not cause them to be recreated. Adjust cue point creation to take live stream segment expiration into account.
1 parent b27cedcc
...@@ -173,13 +173,6 @@ ...@@ -173,13 +173,6 @@
173 (tag.data[19]); 173 (tag.data[19]);
174 } 174 }
175 175
176 // adjust the PTS values to align with the video and audio
177 // streams
178 if (this.timestampOffset) {
179 tag.pts -= this.timestampOffset;
180 tag.dts -= this.timestampOffset;
181 }
182
183 // parse one or more ID3 frames 176 // parse one or more ID3 frames
184 // http://id3.org/id3v2.3.0#ID3v2_frame_overview 177 // http://id3.org/id3v2.3.0#ID3v2_frame_overview
185 do { 178 do {
......
...@@ -19,10 +19,7 @@ ...@@ -19,10 +19,7 @@
19 streamBuffer = new Uint8Array(MP2T_PACKET_LENGTH), 19 streamBuffer = new Uint8Array(MP2T_PACKET_LENGTH),
20 streamBufferByteCount = 0, 20 streamBufferByteCount = 0,
21 h264Stream = new H264Stream(), 21 h264Stream = new H264Stream(),
22 aacStream = new AacStream(), 22 aacStream = new AacStream();
23 h264HasTimeStampOffset = false,
24 aacHasTimeStampOffset = false,
25 timeStampOffset;
26 23
27 // expose the stream metadata 24 // expose the stream metadata
28 self.stream = { 25 self.stream = {
...@@ -34,6 +31,13 @@ ...@@ -34,6 +31,13 @@
34 // allow in-band metadata to be observed 31 // allow in-band metadata to be observed
35 self.metadataStream = new MetadataStream(); 32 self.metadataStream = new MetadataStream();
36 33
34 // The first timestamp value encountered during parsing. This
35 // value can be used to determine the relative timing between
36 // frames and the start of the current timestamp sequence. It
37 // should be reset to null before parsing a segment with
38 // discontinuous timestamp values from previous segments.
39 self.timestampOffset = null;
40
37 // For information on the FLV format, see 41 // For information on the FLV format, see
38 // http://download.macromedia.com/f4v/video_file_format_spec_v10_1.pdf. 42 // http://download.macromedia.com/f4v/video_file_format_spec_v10_1.pdf.
39 // Technically, this function returns the header and a metadata FLV tag 43 // Technically, this function returns the header and a metadata FLV tag
...@@ -354,31 +358,18 @@ ...@@ -354,31 +358,18 @@
354 // Skip past "optional" portion of PTS header 358 // Skip past "optional" portion of PTS header
355 offset += pesHeaderLength; 359 offset += pesHeaderLength;
356 360
357 // align the metadata stream PTS values with the start of 361 // keep track of the earliest encounted PTS value so
358 // the other elementary streams 362 // external parties can align timestamps across
359 if (!self.metadataStream.timestampOffset) { 363 // discontinuities
360 self.metadataStream.timestampOffset = pts; 364 if (self.timestampOffset === null) {
365 self.timestampOffset = pts;
361 } 366 }
362 367
363 if (pid === self.stream.programMapTable[STREAM_TYPES.h264]) { 368 if (pid === self.stream.programMapTable[STREAM_TYPES.h264]) {
364 if (!h264HasTimeStampOffset) {
365 h264HasTimeStampOffset = true;
366 if (timeStampOffset === undefined) {
367 timeStampOffset = pts;
368 }
369 h264Stream.setTimeStampOffset(timeStampOffset);
370 }
371 h264Stream.setNextTimeStamp(pts, 369 h264Stream.setNextTimeStamp(pts,
372 dts, 370 dts,
373 dataAlignmentIndicator); 371 dataAlignmentIndicator);
374 } else if (pid === self.stream.programMapTable[STREAM_TYPES.adts]) { 372 } else if (pid === self.stream.programMapTable[STREAM_TYPES.adts]) {
375 if (!aacHasTimeStampOffset) {
376 aacHasTimeStampOffset = true;
377 if (timeStampOffset === undefined) {
378 timeStampOffset = pts;
379 }
380 aacStream.setTimeStampOffset(timeStampOffset);
381 }
382 aacStream.setNextTimeStamp(pts, 373 aacStream.setNextTimeStamp(pts,
383 pesPacketSize, 374 pesPacketSize,
384 dataAlignmentIndicator); 375 dataAlignmentIndicator);
......
...@@ -46,13 +46,6 @@ videojs.Hls = videojs.Flash.extend({ ...@@ -46,13 +46,6 @@ videojs.Hls = videojs.Flash.extend({
46 // buffered data should be appended to the source buffer 46 // buffered data should be appended to the source buffer
47 this.startCheckingBuffer_(); 47 this.startCheckingBuffer_();
48 48
49 // the earliest presentation timestamp (PTS) encountered since the
50 // last #EXT-X-DISCONTINUITY. In a playlist without
51 // discontinuities, this will be the PTS value for the first frame
52 // in the video. PTS values are necessary to properly synchronize
53 // playback when switching to a variant stream.
54 this.lastStartingPts_ = undefined;
55
56 videojs.Hls.prototype.src.call(this, options.source && options.source.src); 49 videojs.Hls.prototype.src.call(this, options.source && options.source.src);
57 } 50 }
58 }); 51 });
...@@ -96,43 +89,7 @@ videojs.Hls.prototype.src = function(src) { ...@@ -96,43 +89,7 @@ videojs.Hls.prototype.src = function(src) {
96 89
97 // if the stream contains ID3 metadata, expose that as a metadata 90 // if the stream contains ID3 metadata, expose that as a metadata
98 // text track 91 // text track
99 (function() { 92 this.setupMetadataCueTranslation_();
100 var
101 metadataStream = tech.segmentParser_.metadataStream,
102 textTrack;
103
104 // only expose metadata tracks to video.js versions that support
105 // dynamic text tracks (4.12+)
106 if (!tech.player().addTextTrack) {
107 return;
108 }
109
110 metadataStream.on('data', function(metadata) {
111 var i, cue, frame, time, hexDigit;
112
113 // create the metadata track if this is the first ID3 tag we've
114 // seen
115 if (!textTrack) {
116 textTrack = tech.player().addTextTrack('metadata', 'Timed Metadata');
117
118 // build the dispatch type from the stream descriptor
119 // https://html.spec.whatwg.org/multipage/embedded-content.html#steps-to-expose-a-media-resource-specific-text-track
120 textTrack.inBandMetadataTrackDispatchType = videojs.Hls.SegmentParser.STREAM_TYPES.metadata.toString(16).toUpperCase();
121 for (i = 0; i < metadataStream.descriptor.length; i++) {
122 hexDigit = ('00' + metadataStream.descriptor[i].toString(16).toUpperCase()).slice(-2);
123 textTrack.inBandMetadataTrackDispatchType += hexDigit;
124 }
125 }
126
127 for (i = 0; i < metadata.frames.length; i++) {
128 frame = metadata.frames[i];
129 time = metadata.pts / 1000;
130 cue = new window.VTTCue(time, time, frame.value || frame.url || '');
131 cue.frame = frame;
132 textTrack.addCue(cue);
133 }
134 });
135 })();
136 93
137 // load the MediaSource into the player 94 // load the MediaSource into the player
138 this.mediaSource.addEventListener('sourceopen', videojs.bind(this, this.handleSourceOpen)); 95 this.mediaSource.addEventListener('sourceopen', videojs.bind(this, this.handleSourceOpen));
...@@ -289,6 +246,78 @@ videojs.Hls.prototype.handleSourceOpen = function() { ...@@ -289,6 +246,78 @@ videojs.Hls.prototype.handleSourceOpen = function() {
289 } 246 }
290 }; 247 };
291 248
249 // register event listeners to transform in-band metadata events into
250 // VTTCues on a text track
251 videojs.Hls.prototype.setupMetadataCueTranslation_ = function() {
252 var
253 tech = this,
254 metadataStream = tech.segmentParser_.metadataStream,
255 textTrack;
256
257 // only expose metadata tracks to video.js versions that support
258 // dynamic text tracks (4.12+)
259 if (!tech.player().addTextTrack) {
260 return;
261 }
262
263 // add a metadata cue whenever a metadata event is triggered during
264 // segment parsing
265 metadataStream.on('data', function(metadata) {
266 var i, cue, frame, time, media, segmentOffset, hexDigit;
267
268 // create the metadata track if this is the first ID3 tag we've
269 // seen
270 if (!textTrack) {
271 textTrack = tech.player().addTextTrack('metadata', 'Timed Metadata');
272
273 // build the dispatch type from the stream descriptor
274 // https://html.spec.whatwg.org/multipage/embedded-content.html#steps-to-expose-a-media-resource-specific-text-track
275 textTrack.inBandMetadataTrackDispatchType = videojs.Hls.SegmentParser.STREAM_TYPES.metadata.toString(16).toUpperCase();
276 for (i = 0; i < metadataStream.descriptor.length; i++) {
277 hexDigit = ('00' + metadataStream.descriptor[i].toString(16).toUpperCase()).slice(-2);
278 textTrack.inBandMetadataTrackDispatchType += hexDigit;
279 }
280 }
281
282 // calculate the start time for the segment that is currently being parsed
283 media = tech.playlists.media();
284 segmentOffset = tech.playlists.expiredPreDiscontinuity_ + tech.playlists.expiredPostDiscontinuity_;
285 segmentOffset += videojs.Hls.Playlist.duration(media, media.mediaSequence, media.mediaSequence + tech.mediaIndex);
286
287 // create cue points for all the ID3 frames in this metadata event
288 for (i = 0; i < metadata.frames.length; i++) {
289 frame = metadata.frames[i];
290 time = segmentOffset + ((metadata.pts - tech.segmentParser_.timestampOffset) * 0.001);
291 cue = new window.VTTCue(time, time, frame.value || frame.url || '');
292 cue.frame = frame;
293 textTrack.addCue(cue);
294 }
295 });
296
297 // when seeking, clear out all cues ahead of the earliest position
298 // in the new segment. keep earlier cues around so they can still be
299 // programmatically inspected even though they've already fired
300 tech.on('seeking', function() {
301 if (!textTrack) {
302 return;
303 }
304 var media = tech.playlists.media(), i;
305 var startTime = tech.playlists.expiredPreDiscontinuity_ + tech.playlists.expiredPostDiscontinuity_;
306 startTime += videojs.Hls.Playlist.duration(media, media.mediaSequence, media.mediaSequence + tech.mediaIndex);
307 console.trace('seeking');
308
309 i = textTrack.cues.length;
310 while (i--) {
311 if (textTrack.cues[i].startTime < startTime) {
312 // cues are sorted by start time, earliest first, so all the
313 // rest of the cues are from earlier segments
314 break;
315 }
316 textTrack.removeCue(textTrack.cues[i])
317 }
318 });
319 };
320
292 /** 321 /**
293 * Reset the mediaIndex if play() is called after the video has 322 * Reset the mediaIndex if play() is called after the video has
294 * ended. 323 * ended.
...@@ -815,6 +844,13 @@ videojs.Hls.prototype.drainBuffer = function(event) { ...@@ -815,6 +844,13 @@ videojs.Hls.prototype.drainBuffer = function(event) {
815 segmentOffset += videojs.Hls.Playlist.duration(playlist, playlist.mediaSequence, playlist.mediaSequence + mediaIndex); 844 segmentOffset += videojs.Hls.Playlist.duration(playlist, playlist.mediaSequence, playlist.mediaSequence + mediaIndex);
816 segmentOffset *= 1000; 845 segmentOffset *= 1000;
817 846
847 // if this segment starts is the start of a new discontinuity
848 // sequence, the segment parser's timestamp offset must be
849 // re-calculated
850 if (segment.discontinuity) {
851 this.segmentParser_.timestampOffset = null;
852 }
853
818 // transmux the segment data from MP2T to FLV 854 // transmux the segment data from MP2T to FLV
819 this.segmentParser_.parseSegmentBinaryData(bytes); 855 this.segmentParser_.parseSegmentBinaryData(bytes);
820 this.segmentParser_.flushTags(); 856 this.segmentParser_.flushTags();
......
...@@ -186,28 +186,6 @@ ...@@ -186,28 +186,6 @@
186 // too large/small tag size values 186 // too large/small tag size values
187 // too large/small frame size values 187 // too large/small frame size values
188 188
189 test('translates PTS and DTS values based on the timestamp offset', function() {
190 var events = [];
191 metadataStream.on('data', function(event) {
192 events.push(event);
193 });
194
195 metadataStream.timestampOffset = 800;
196
197 metadataStream.push({
198 trackId: 7,
199 pts: 1000,
200 dts: 900,
201
202 // header
203 data: new Uint8Array(id3Tag(id3Frame('XFFF', [0]), [0x00, 0x00]))
204 });
205
206 equal(events.length, 1, 'emitted an event');
207 equal(events[0].pts, 200, 'translated pts');
208 equal(events[0].dts, 100, 'translated dts');
209 });
210
211 test('parses TXXX frames', function() { 189 test('parses TXXX frames', function() {
212 var events = []; 190 var events = [];
213 metadataStream.on('data', function(event) { 191 metadataStream.on('data', function(event) {
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
15 <script src="../libs/qunit/qunit.js"></script> 15 <script src="../libs/qunit/qunit.js"></script>
16 16
17 <!-- video.js --> 17 <!-- video.js -->
18 <script src="../node_modules/video.js/dist/video-js/video.js"></script> 18 <script src="../node_modules/video.js/dist/video-js/video.dev.js"></script>
19 <script src="../node_modules/videojs-contrib-media-sources/src/videojs-media-sources.js"></script> 19 <script src="../node_modules/videojs-contrib-media-sources/src/videojs-media-sources.js"></script>
20 20
21 <!-- HLS plugin --> 21 <!-- HLS plugin -->
......
...@@ -1249,6 +1249,218 @@ test('exposes in-band metadata events as cues', function() { ...@@ -1249,6 +1249,218 @@ test('exposes in-band metadata events as cues', function() {
1249 'set the private data'); 1249 'set the private data');
1250 }); 1250 });
1251 1251
1252 test('only adds in-band cues the first time they are encountered', function() {
1253 var tags = [{ pts: 0, bytes: new Uint8Array(1) }], track;
1254 player.src({
1255 src: 'manifest/media.m3u8',
1256 type: 'application/vnd.apple.mpegurl'
1257 });
1258 openMediaSource(player);
1259
1260 player.hls.segmentParser_.getNextTag = function() {
1261 return tags.shift();
1262 };
1263 player.hls.segmentParser_.tagsAvailable = function() {
1264 return tags.length;
1265 };
1266 player.hls.segmentParser_.parseSegmentBinaryData = function() {
1267 // fake out a descriptor
1268 player.hls.segmentParser_.metadataStream.descriptor = new Uint8Array([
1269 1, 2, 3, 0xbb
1270 ]);
1271 // trigger a metadata event
1272 player.hls.segmentParser_.metadataStream.trigger('data', {
1273 pts: 2000,
1274 data: new Uint8Array([]),
1275 frames: [{
1276 id: 'TXXX',
1277 value: 'cue text'
1278 }]
1279 });
1280 };
1281 standardXHRResponse(requests.shift());
1282 standardXHRResponse(requests.shift());
1283 // seek back to the first segment
1284 player.currentTime(0);
1285 player.hls.trigger('seeking');
1286 tags.push({ pts: 0, bytes: new Uint8Array(1) });
1287 standardXHRResponse(requests.shift());
1288
1289 track = player.textTracks()[0];
1290 equal(track.cues.length, 1, 'only added the cue once');
1291 });
1292
1293 test('clears in-band cues ahead of current time on seek', function() {
1294 var
1295 tags = [],
1296 events = [],
1297 track;
1298 player.src({
1299 src: 'manifest/media.m3u8',
1300 type: 'application/vnd.apple.mpegurl'
1301 });
1302 openMediaSource(player);
1303
1304 player.hls.segmentParser_.getNextTag = function() {
1305 return tags.shift();
1306 };
1307 player.hls.segmentParser_.tagsAvailable = function() {
1308 return tags.length;
1309 };
1310 player.hls.segmentParser_.parseSegmentBinaryData = function() {
1311 // fake out a descriptor
1312 player.hls.segmentParser_.metadataStream.descriptor = new Uint8Array([
1313 1, 2, 3, 0xbb
1314 ]);
1315 // trigger a metadata event
1316 if (events.length) {
1317 player.hls.segmentParser_.metadataStream.trigger('data', events.shift());
1318 }
1319 };
1320 standardXHRResponse(requests.shift()); // media
1321 tags.push({ pts: 10 * 1000, bytes: new Uint8Array(1) });
1322 events.push({
1323 pts: 9.9 * 1000,
1324 data: new Uint8Array([]),
1325 frames: [{
1326 id: 'TXXX',
1327 value: 'cue 1'
1328 }]
1329 });
1330 standardXHRResponse(requests.shift()); // segment 0
1331 tags.push({ pts: 20 * 1000, bytes: new Uint8Array(1) });
1332 events.push({
1333 pts: 19.9 * 1000,
1334 data: new Uint8Array([]),
1335 frames: [{
1336 id: 'TXXX',
1337 value: 'cue 2'
1338 }]
1339 });
1340 player.hls.checkBuffer_();
1341 standardXHRResponse(requests.shift()); // segment 1
1342
1343 track = player.textTracks()[0];
1344 equal(track.cues.length, 2, 'added the cues');
1345
1346 // seek into segment 1
1347 player.currentTime(11);
1348 player.hls.trigger('seeking');
1349 equal(track.cues.length, 1, 'removed a cue');
1350 equal(track.cues[0].startTime, 9.9, 'retained the earlier cue');
1351 });
1352
1353 test('translates ID3 PTS values to cue media timeline positions', function() {
1354 var tags = [{ pts: 4 * 1000, bytes: new Uint8Array(1) }], track;
1355 player.src({
1356 src: 'manifest/media.m3u8',
1357 type: 'application/vnd.apple.mpegurl'
1358 });
1359 openMediaSource(player);
1360
1361 player.hls.segmentParser_.getNextTag = function() {
1362 return tags.shift();
1363 };
1364 player.hls.segmentParser_.tagsAvailable = function() {
1365 return tags.length;
1366 };
1367 player.hls.segmentParser_.parseSegmentBinaryData = function() {
1368 // setup the timestamp offset
1369 this.timestampOffset = tags[0].pts;
1370
1371 // fake out a descriptor
1372 player.hls.segmentParser_.metadataStream.descriptor = new Uint8Array([
1373 1, 2, 3, 0xbb
1374 ]);
1375 // trigger a metadata event
1376 player.hls.segmentParser_.metadataStream.trigger('data', {
1377 pts: 5 * 1000,
1378 data: new Uint8Array([]),
1379 frames: [{
1380 id: 'TXXX',
1381 value: 'cue text'
1382 }]
1383 });
1384 };
1385 standardXHRResponse(requests.shift()); // media
1386 standardXHRResponse(requests.shift()); // segment 0
1387
1388 track = player.textTracks()[0];
1389 equal(track.cues[0].startTime, 1, 'translated startTime');
1390 equal(track.cues[0].endTime, 1, 'translated startTime');
1391 });
1392
1393 test('translates ID3 PTS values across discontinuities', function() {
1394 var tags = [], events = [], track;
1395 player.src({
1396 src: 'cues-and-discontinuities.m3u8',
1397 type: 'application/vnd.apple.mpegurl'
1398 });
1399 openMediaSource(player);
1400
1401 player.hls.segmentParser_.getNextTag = function() {
1402 return tags.shift();
1403 };
1404 player.hls.segmentParser_.tagsAvailable = function() {
1405 return tags.length;
1406 };
1407 player.hls.segmentParser_.parseSegmentBinaryData = function() {
1408 if (this.timestampOffset === null) {
1409 this.timestampOffset = tags[0].pts;
1410 }
1411 // fake out a descriptor
1412 player.hls.segmentParser_.metadataStream.descriptor = new Uint8Array([
1413 1, 2, 3, 0xbb
1414 ]);
1415 // trigger a metadata event
1416 if (events.length) {
1417 player.hls.segmentParser_.metadataStream.trigger('data', events.shift());
1418 }
1419 };
1420
1421 // media playlist
1422 requests.shift().respond(200, null,
1423 '#EXTM3U\n' +
1424 '#EXTINF:10,\n' +
1425 '0.ts\n' +
1426 '#EXT-X-DISCONTINUITY\n' +
1427 '#EXTINF:10,\n' +
1428 '1.ts\n');
1429
1430 // segment 0 starts at PTS 14000 and has a cue point at 15000
1431 tags.push({ pts: 14 * 1000, bytes: new Uint8Array(1) });
1432 events.push({
1433 pts: 15 * 1000,
1434 data: new Uint8Array([]),
1435 frames: [{
1436 id: 'TXXX',
1437 value: 'cue 0'
1438 }]
1439 });
1440 standardXHRResponse(requests.shift()); // segment 0
1441
1442 // segment 1 is after a discontinuity, starts at PTS 22000
1443 // and has a cue point at 15000
1444 tags.push({ pts: 22 * 1000, bytes: new Uint8Array(1) });
1445 events.push({
1446 pts: 23 * 1000,
1447 data: new Uint8Array([]),
1448 frames: [{
1449 id: 'TXXX',
1450 value: 'cue 0'
1451 }]
1452 });
1453 player.hls.checkBuffer_();
1454 standardXHRResponse(requests.shift());
1455
1456 track = player.textTracks()[0];
1457 equal(track.cues.length, 2, 'created cues');
1458 equal(track.cues[0].startTime, 1, 'first cue started at the correct time');
1459 equal(track.cues[0].endTime, 1, 'first cue ended at the correct time');
1460 equal(track.cues[1].startTime, 11, 'second cue started at the correct time');
1461 equal(track.cues[1].endTime, 11, 'second cue ended at the correct time');
1462 });
1463
1252 test('drops tags before the target timestamp when seeking', function() { 1464 test('drops tags before the target timestamp when seeking', function() {
1253 var i = 10, 1465 var i = 10,
1254 tags = [], 1466 tags = [],
......