fabd2276 by David LaPalomento

Merge pull request #262 from videojs/id3-enhancements

Id3 enhancements
2 parents 1811ffc3 fd6c3d02
......@@ -166,12 +166,19 @@ embedded as [ID3 tags](http://id3.org/id3v2.3.0). When a stream is
encountered with embedded metadata, an [in-band metadata text
track](https://html.spec.whatwg.org/multipage/embedded-content.html#text-track-in-band-metadata-track-dispatch-type)
will automatically be created and populated with cues as they are
encountered in the stream. Only UTF-8 encoded
encountered in the stream. UTF-8 encoded
[TXXX](http://id3.org/id3v2.3.0#User_defined_text_information_frame)
and [WXXX](http://id3.org/id3v2.3.0#User_defined_URL_link_frame) ID3
frames are currently mapped to cue points. There are lots of guides
and references to using text tracks [around the
web](http://www.html5rocks.com/en/tutorials/track/basics/).
frames are mapped to cue points and their values set as the cue
text. Cues are created for all other frame types and the data is
attached to the generated cue:
```js
cue.frame.data
```
There are lots of guides and references to using text tracks [around
the web](http://www.html5rocks.com/en/tutorials/track/basics/).
### Testing
......
......@@ -6,12 +6,24 @@
(function(window, videojs, undefined) {
'use strict';
var
parseString = function(bytes, start, end) {
// return a percent-encoded representation of the specified byte range
// @see http://en.wikipedia.org/wiki/Percent-encoding
percentEncode = function(bytes, start, end) {
var i, result = '';
for (i = start; i < end; i++) {
result += '%' + ('00' + bytes[i].toString(16)).slice(-2);
}
return window.decodeURIComponent(result);
return result;
},
// return the string representation of the specified byte range,
// interpreted as UTf-8.
parseUtf8 = function(bytes, start, end) {
return window.decodeURIComponent(percentEncode(bytes, start, end));
},
// return the string representation of the specified byte range,
// interpreted as ISO-8859-1.
parseIso88591 = function(bytes, start, end) {
return window.unescape(percentEncode(bytes, start, end));
},
tagParsers = {
'TXXX': function(tag) {
......@@ -24,8 +36,8 @@
for (i = 1; i < tag.data.length; i++) {
if (tag.data[i] === 0) {
// parse the text fields
tag.description = parseString(tag.data, 1, i);
tag.value = parseString(tag.data, i + 1, tag.data.length);
tag.description = parseUtf8(tag.data, 1, i);
tag.value = parseUtf8(tag.data, i + 1, tag.data.length);
break;
}
}
......@@ -40,24 +52,45 @@
for (i = 1; i < tag.data.length; i++) {
if (tag.data[i] === 0) {
// parse the description and URL fields
tag.description = parseString(tag.data, 1, i);
tag.url = parseString(tag.data, i + 1, tag.data.length);
tag.description = parseUtf8(tag.data, 1, i);
tag.url = parseUtf8(tag.data, i + 1, tag.data.length);
break;
}
}
},
'PRIV': function(tag) {
var i;
for (i = 0; i < tag.data.length; i++) {
if (tag.data[i] === 0) {
// parse the description and URL fields
tag.owner = parseIso88591(tag.data, 0, i);
break;
}
}
tag.privateData = tag.data.subarray(i + 1);
}
},
MetadataStream;
MetadataStream = function(options) {
var settings = {
var
settings = {
debug: !!(options && options.debug),
// the bytes of the program-level descriptor field in MP2T
// see ISO/IEC 13818-1:2013 (E), section 2.6 "Program and
// program element descriptors"
descriptor: options && options.descriptor
}, i;
},
// the total size in bytes of the ID3 tag being parsed
tagSize = 0,
// tag data that is not complete enough to be parsed
buffer = [],
// the total number of bytes currently in the buffer
bufferSize = 0,
i;
MetadataStream.prototype.init.call(this);
// calculate the text track in-band metadata track dispatch type
......@@ -70,73 +103,111 @@
}
this.push = function(chunk) {
var tagSize, frameStart, frameSize, frame;
var tag, frameStart, frameSize, frame, i;
// ignore events that don't look like ID3 data
if (chunk.data.length < 10 ||
if (buffer.length === 0 &&
(chunk.data.length < 10 ||
chunk.data[0] !== 'I'.charCodeAt(0) ||
chunk.data[1] !== 'D'.charCodeAt(0) ||
chunk.data[2] !== '3'.charCodeAt(0)) {
chunk.data[2] !== '3'.charCodeAt(0))) {
if (settings.debug) {
videojs.log('Skipping unrecognized metadata stream');
videojs.log('Skipping unrecognized metadata packet');
}
return;
}
// add this chunk to the data we've collected so far
buffer.push(chunk);
bufferSize += chunk.data.byteLength;
// grab the size of the entire frame from the ID3 header
if (buffer.length === 1) {
// the frame size is transmitted as a 28-bit integer in the
// last four bytes of the ID3 header.
// The most significant bit of each byte is dropped and the
// results concatenated to recover the actual value.
tagSize = (chunk.data[6] << 21) |
(chunk.data[7] << 14) |
(chunk.data[8] << 7) |
(chunk.data[9]);
// ID3 reports the tag size excluding the header but it's more
// convenient for our comparisons to include it
tagSize += 10;
}
// if the entire frame has not arrived, wait for more data
if (bufferSize < tagSize) {
return;
}
// collect the entire frame so it can be parsed
tag = {
data: new Uint8Array(tagSize),
frames: [],
pts: buffer[0].pts,
dts: buffer[0].dts
};
for (i = 0; i < tagSize;) {
tag.data.set(buffer[0].data, i);
i += buffer[0].data.byteLength;
bufferSize -= buffer[0].data.byteLength;
buffer.shift();
}
// find the start of the first frame and the end of the tag
tagSize = chunk.data.byteLength;
frameStart = 10;
if (chunk.data[5] & 0x40) {
if (tag.data[5] & 0x40) {
// advance the frame start past the extended header
frameStart += 4; // header size field
frameStart += (chunk.data[10] << 24) |
(chunk.data[11] << 16) |
(chunk.data[12] << 8) |
(chunk.data[13]);
frameStart += (tag.data[10] << 24) |
(tag.data[11] << 16) |
(tag.data[12] << 8) |
(tag.data[13]);
// clip any padding off the end
tagSize -= (chunk.data[16] << 24) |
(chunk.data[17] << 16) |
(chunk.data[18] << 8) |
(chunk.data[19]);
tagSize -= (tag.data[16] << 24) |
(tag.data[17] << 16) |
(tag.data[18] << 8) |
(tag.data[19]);
}
// adjust the PTS values to align with the video and audio
// streams
if (this.timestampOffset) {
chunk.pts -= this.timestampOffset;
chunk.dts -= this.timestampOffset;
tag.pts -= this.timestampOffset;
tag.dts -= this.timestampOffset;
}
// parse one or more ID3 frames
// http://id3.org/id3v2.3.0#ID3v2_frame_overview
chunk.frames = [];
do {
// determine the number of bytes in this frame
frameSize = (chunk.data[frameStart + 4] << 24) |
(chunk.data[frameStart + 5] << 16) |
(chunk.data[frameStart + 6] << 8) |
(chunk.data[frameStart + 7]);
frameSize = (tag.data[frameStart + 4] << 24) |
(tag.data[frameStart + 5] << 16) |
(tag.data[frameStart + 6] << 8) |
(tag.data[frameStart + 7]);
if (frameSize < 1) {
return videojs.log('Malformed ID3 frame encountered. Skipping metadata parsing.');
}
frame = {
id: String.fromCharCode(chunk.data[frameStart]) +
String.fromCharCode(chunk.data[frameStart + 1]) +
String.fromCharCode(chunk.data[frameStart + 2]) +
String.fromCharCode(chunk.data[frameStart + 3]),
data: chunk.data.subarray(frameStart + 10, frameStart + frameSize + 10)
id: String.fromCharCode(tag.data[frameStart],
tag.data[frameStart + 1],
tag.data[frameStart + 2],
tag.data[frameStart + 3]),
data: tag.data.subarray(frameStart + 10, frameStart + frameSize + 10)
};
if (tagParsers[frame.id]) {
tagParsers[frame.id](frame);
}
chunk.frames.push(frame);
tag.frames.push(frame);
frameStart += 10; // advance past the frame header
frameStart += frameSize; // advance past the frame body
} while (frameStart < tagSize);
this.trigger('data', chunk);
this.trigger('data', tag);
};
};
MetadataStream.prototype = new videojs.Hls.Stream();
......
......@@ -368,12 +368,6 @@
aacStream.setNextTimeStamp(pts,
pesPacketSize,
dataAlignmentIndicator);
} else {
self.metadataStream.push({
pts: pts,
dts: dts,
data: data.subarray(offset)
});
}
}
......@@ -381,6 +375,12 @@
aacStream.writeBytes(data, offset, end - offset);
} else if (pid === self.stream.programMapTable[STREAM_TYPES.h264]) {
h264Stream.writeBytes(data, offset, end - offset);
} else if (pid === self.stream.programMapTable[STREAM_TYPES.metadata]) {
self.metadataStream.push({
pts: pts,
dts: dts,
data: data.subarray(offset)
});
}
} else if (self.stream.pmtPid === pid) {
// similarly to the PAT, jump to the first byte of the section
......
......@@ -99,7 +99,7 @@ videojs.Hls.prototype.src = function(src) {
}
metadataStream.on('data', function(metadata) {
var i, frame, time, hexDigit;
var i, cue, frame, time, hexDigit;
// create the metadata track if this is the first ID3 tag we've
// seen
......@@ -118,7 +118,9 @@ videojs.Hls.prototype.src = function(src) {
for (i = 0; i < metadata.frames.length; i++) {
frame = metadata.frames[i];
time = metadata.pts / 1000;
textTrack.addCue(new window.VTTCue(time, time, frame.value || frame.url));
cue = new window.VTTCue(time, time, frame.value || frame.url || '');
cue.frame = frame;
textTrack.addCue(cue);
}
});
})();
......
......@@ -60,11 +60,13 @@
], frames),
size;
// size is stored as a sequence of four 7-bit integers with the
// high bit of each byte set to zero
size = result.length - 10;
result[6] = (size >>> 24) & 0xff;
result[7] = (size >>> 16) & 0xff;
result[8] = (size >>> 8) & 0xff;
result[9] = (size) & 0xff;
result[6] = (size >>> 21) & 0x7f;
result[7] = (size >>> 14) & 0x7f;
result[8] = (size >>> 7) & 0x7f;
result[9] = (size) & 0x7f;
return result;
};
......@@ -206,7 +208,7 @@
equal(events[0].dts, 100, 'translated dts');
});
test('parses TXXX tags', function() {
test('parses TXXX frames', function() {
var events = [];
metadataStream.on('data', function(event) {
events.push(event);
......@@ -227,11 +229,12 @@
equal(events.length, 1, 'parsed one tag');
equal(events[0].frames.length, 1, 'parsed one frame');
equal(events[0].frames[0].id, 'TXXX', 'parsed the frame id');
equal(events[0].frames[0].description, 'get done', 'parsed the description');
equal(events[0].frames[0].value, '{ "key": "value" }', 'parsed the value');
});
test('parses WXXX tags', function() {
test('parses WXXX frames', function() {
var events = [], url = 'http://example.com/path/file?abc=7&d=4#ty';
metadataStream.on('data', function(event) {
events.push(event);
......@@ -252,11 +255,12 @@
equal(events.length, 1, 'parsed one tag');
equal(events[0].frames.length, 1, 'parsed one frame');
equal(events[0].frames[0].id, 'WXXX', 'parsed the frame id');
equal(events[0].frames[0].description, '', 'parsed the description');
equal(events[0].frames[0].url, url, 'parsed the value');
});
test('parses TXXX tags with characters that have a single-digit hexadecimal representation', function() {
test('parses TXXX frames with characters that have a single-digit hexadecimal representation', function() {
var events = [], value = String.fromCharCode(7);
metadataStream.on('data', function(event) {
events.push(event);
......@@ -280,6 +284,138 @@
'parsed the single-digit character');
});
test('parses PRIV frames', function() {
var
events = [],
payload = stringToInts('arbitrary data may be included in the payload ' +
'of a PRIV frame');
metadataStream.on('data', function(event) {
events.push(event);
});
metadataStream.push({
trackId: 7,
pts: 1000,
dts: 900,
// header
data: new Uint8Array(id3Tag(id3Frame('PRIV',
stringToCString('priv-owner@example.com'),
payload)))
});
equal(events.length, 1, 'parsed a tag');
equal(events[0].frames.length, 1, 'parsed a frame');
equal(events[0].frames[0].id, 'PRIV', 'frame id is PRIV');
equal(events[0].frames[0].owner, 'priv-owner@example.com', 'parsed the owner');
deepEqual(new Uint8Array(events[0].frames[0].privateData),
new Uint8Array(payload),
'parsed the frame private data');
});
test('parses tags split across pushes', function() {
var
events = [],
owner = stringToCString('owner@example.com'),
payload = stringToInts('A TS packet is 188 bytes in length so that it can' +
' be easily transmitted over ATM networks, an ' +
'important medium at one time. We want to be sure' +
' that ID3 frames larger than a TS packet are ' +
'properly re-assembled.'),
tag = new Uint8Array(id3Tag(id3Frame('PRIV', owner, payload))),
front = tag.subarray(0, 100),
back = tag.subarray(100);
metadataStream.on('data', function(event) {
events.push(event);
});
metadataStream.push({
trackId: 7,
pts: 1000,
dts: 900,
data: front
});
equal(events.length, 0, 'parsed zero tags');
metadataStream.push({
trackId: 7,
pts: 1000,
dts: 900,
data: back
});
equal(events.length, 1, 'parsed a tag');
equal(events[0].frames.length, 1, 'parsed a frame');
equal(events[0].frames[0].data.byteLength,
owner.length + payload.length,
'collected data across pushes');
// parses subsequent fragmented tags
tag = new Uint8Array(id3Tag(id3Frame('PRIV',
owner, payload, payload)));
front = tag.subarray(0, 188);
back = tag.subarray(188);
metadataStream.push({
trackId: 7,
pts: 2000,
dts: 2000,
data: front
});
metadataStream.push({
trackId: 7,
pts: 2000,
dts: 2000,
data: back
});
equal(events.length, 2, 'parsed a subseqent frame');
});
test('ignores tags when the header is fragmented', function() {
var
events = [],
tag = new Uint8Array(id3Tag(id3Frame('PRIV',
stringToCString('owner@example.com'),
stringToInts('payload')))),
// split the 10-byte ID3 tag header in half
front = tag.subarray(0, 5),
back = tag.subarray(5);
metadataStream.on('data', function(event) {
events.push(event);
});
metadataStream.push({
trackId: 7,
pts: 1000,
dts: 900,
data: front
});
metadataStream.push({
trackId: 7,
pts: 1000,
dts: 900,
data: back
});
equal(events.length, 0, 'parsed zero tags');
metadataStream.push({
trackId: 7,
pts: 1500,
dts: 1500,
data: new Uint8Array(id3Tag(id3Frame('PRIV',
stringToCString('owner2'),
stringToInts('payload2'))))
});
equal(events.length, 1, 'parsed one tag');
equal(events[0].frames[0].owner, 'owner2', 'dropped the first tag');
});
// https://html.spec.whatwg.org/multipage/embedded-content.html#steps-to-expose-a-media-resource-specific-text-track
test('constructs the dispatch type', function() {
metadataStream = new videojs.Hls.MetadataStream({
......
......@@ -177,7 +177,8 @@
// sync_byte
result.push(0x47);
// transport_error_indicator payload_unit_start_indicator transport_priority PID
result.push((settings.pid & 0x1f) << 8 | 0x40);
result.push((settings.pid & 0x1f) << 8 |
(settings.payloadUnitStartIndicator ? 0x40 : 0x00));
result.push(settings.pid & 0xff);
// transport_scrambling_control adaptation_field_control continuity_counter
result.push(0x10);
......@@ -226,6 +227,29 @@
equal(parser.stream.programMapTable[0x15], 0x02, 'metadata is PID 2');
});
test('recognizes subsequent metadata packets after the payload start', function() {
var packets = [];
parser.metadataStream.push = function(packet) {
packets.push(packet);
};
parser.parseSegmentBinaryData(new Uint8Array(makePacket({
programs: {
0x01: [0x01]
}
}).concat(makePacket({
pid: 0x01,
pids: {
// Rec. ITU-T H.222.0 (06/2012), Table 2-34
0x02: 0x15 // Metadata carried in PES packets
}
})).concat(makePacket({
pid: 0x02,
payloadUnitStartIndicator: false
}))));
equal(packets.length, 1, 'parsed non-payload metadata packet');
});
test('parses the first bipbop segment', function() {
parser.parseSegmentBinaryData(window.bcSegment);
......
......@@ -1112,11 +1112,15 @@ test('exposes in-band metadata events as cues', function() {
pts: 2000,
data: new Uint8Array([]),
frames: [{
type: 'TXXX',
id: 'TXXX',
value: 'cue text'
}, {
type: 'WXXX',
id: 'WXXX',
url: 'http://example.com'
}, {
id: 'PRIV',
owner: 'owner@example.com',
privateData: new Uint8Array([1, 2, 3])
}]
});
};
......@@ -1128,7 +1132,7 @@ test('exposes in-band metadata events as cues', function() {
track = player.textTracks()[0];
equal(track.kind, 'metadata', 'kind is metadata');
equal(track.inBandMetadataTrackDispatchType, '15010203BB', 'set the dispatch type');
equal(track.cues.length, 2, 'created two cues');
equal(track.cues.length, 3, 'created three cues');
equal(track.cues[0].startTime, 2, 'cue starts at 2 seconds');
equal(track.cues[0].endTime, 2, 'cue ends at 2 seconds');
equal(track.cues[0].pauseOnExit, false, 'cue does not pause on exit');
......@@ -1138,6 +1142,15 @@ test('exposes in-band metadata events as cues', function() {
equal(track.cues[1].endTime, 2, 'cue ends at 2 seconds');
equal(track.cues[1].pauseOnExit, false, 'cue does not pause on exit');
equal(track.cues[1].text, 'http://example.com', 'set cue text');
equal(track.cues[2].startTime, 2, 'cue starts at 2 seconds');
equal(track.cues[2].endTime, 2, 'cue ends at 2 seconds');
equal(track.cues[2].pauseOnExit, false, 'cue does not pause on exit');
equal(track.cues[2].text, '', 'did not set cue text');
equal(track.cues[2].frame.owner, 'owner@example.com', 'set the owner');
deepEqual(track.cues[2].frame.privateData,
new Uint8Array([1, 2, 3]),
'set the private data');
});
test('drops tags before the target timestamp when seeking', function() {
......