e37356d8 by David LaPalomento

Merge pull request #251 from videojs/hlse-perf

Optimize decryption
2 parents 77591534 48f0f290
......@@ -38,7 +38,18 @@
(function(window, videojs, unpad) {
'use strict';
var AES, decrypt;
var AES, AsyncStream, Decrypter, decrypt, ntoh;
/**
* Convert network-order (big-endian) bytes into their little-endian
* representation.
*/
ntoh = function(word) {
return (word << 24) |
((word & 0xff00) << 8) |
((word & 0xff0000) >> 8) |
(word >>> 24);
};
/**
* Schedule out an AES key for both encryption and decryption. This
......@@ -49,38 +60,38 @@ var AES, decrypt;
*/
AES = function (key) {
this._precompute();
var i, j, tmp,
encKey, decKey,
sbox = this._tables[0][4], decTable = this._tables[1],
keyLen = key.length, rcon = 1;
if (keyLen !== 4 && keyLen !== 6 && keyLen !== 8) {
throw new Error("Invalid aes key size");
}
encKey = key.slice(0);
decKey = [];
this._key = [encKey, decKey];
// schedule encryption keys
for (i = keyLen; i < 4 * keyLen + 28; i++) {
tmp = encKey[i-1];
// apply sbox
if (i%keyLen === 0 || (keyLen === 8 && i%keyLen === 4)) {
tmp = sbox[tmp>>>24]<<24 ^ sbox[tmp>>16&255]<<16 ^ sbox[tmp>>8&255]<<8 ^ sbox[tmp&255];
// shift rows and add rcon
if (i%keyLen === 0) {
tmp = tmp<<8 ^ tmp>>>24 ^ rcon<<24;
rcon = rcon<<1 ^ (rcon>>7)*283;
}
}
encKey[i] = encKey[i-keyLen] ^ tmp;
}
// schedule decryption keys
for (j = 0; i; j++, i--) {
tmp = encKey[j&3 ? i : i - 4];
......@@ -124,126 +135,231 @@ AES.prototype = {
for (i = 0; i < 256; i++) {
th[( d[i] = i<<1 ^ (i>>7)*283 )^i]=i;
}
for (x = xInv = 0; !sbox[x]; x ^= x2 || 1, xInv = th[xInv] || 1) {
// Compute sbox
s = xInv ^ xInv<<1 ^ xInv<<2 ^ xInv<<3 ^ xInv<<4;
s = s>>8 ^ s&255 ^ 99;
sbox[x] = s;
sboxInv[s] = x;
// Compute MixColumns
x8 = d[x4 = d[x2 = d[x]]];
tDec = x8*0x1010101 ^ x4*0x10001 ^ x2*0x101 ^ x*0x1010100;
tEnc = d[s]*0x101 ^ s*0x1010100;
for (i = 0; i < 4; i++) {
encTable[i][x] = tEnc = tEnc<<24 ^ tEnc>>>8;
decTable[i][s] = tDec = tDec<<24 ^ tDec>>>8;
}
}
// Compactify. Considerable speedup on Firefox.
for (i = 0; i < 5; i++) {
encTable[i] = encTable[i].slice(0);
decTable[i] = decTable[i].slice(0);
}
},
/**
* Decrypt an array of 4 big-endian words.
* @param {Array} data The ciphertext.
* Decrypt 16 bytes, specified as four 32-bit words.
* @param encrypted0 {number} the first word to decrypt
* @param encrypted1 {number} the second word to decrypt
* @param encrypted2 {number} the third word to decrypt
* @param encrypted3 {number} the fourth word to decrypt
* @param out {Int32Array} the array to write the decrypted words
* into
* @param offset {number} the offset into the output array to start
* writing results
* @return {Array} The plaintext.
*/
decrypt:function (input) {
if (input.length !== 4) {
throw new Error("Invalid aes block size");
}
decrypt:function (encrypted0, encrypted1, encrypted2, encrypted3, out, offset) {
var key = this._key[1],
// state variables a,b,c,d are loaded with pre-whitened data
a = input[0] ^ key[0],
b = input[3] ^ key[1],
c = input[2] ^ key[2],
d = input[1] ^ key[3],
a = encrypted0 ^ key[0],
b = encrypted3 ^ key[1],
c = encrypted2 ^ key[2],
d = encrypted1 ^ key[3],
a2, b2, c2,
nInnerRounds = key.length/4 - 2,
nInnerRounds = key.length / 4 - 2, // key.length === 2 ?
i,
kIndex = 4,
out = [0,0,0,0],
table = this._tables[1],
// load up the tables
t0 = table[0],
t1 = table[1],
t2 = table[2],
t3 = table[3],
table0 = table[0],
table1 = table[1],
table2 = table[2],
table3 = table[3],
sbox = table[4];
// Inner rounds. Cribbed from OpenSSL.
for (i = 0; i < nInnerRounds; i++) {
a2 = t0[a>>>24] ^ t1[b>>16 & 255] ^ t2[c>>8 & 255] ^ t3[d & 255] ^ key[kIndex];
b2 = t0[b>>>24] ^ t1[c>>16 & 255] ^ t2[d>>8 & 255] ^ t3[a & 255] ^ key[kIndex + 1];
c2 = t0[c>>>24] ^ t1[d>>16 & 255] ^ t2[a>>8 & 255] ^ t3[b & 255] ^ key[kIndex + 2];
d = t0[d>>>24] ^ t1[a>>16 & 255] ^ t2[b>>8 & 255] ^ t3[c & 255] ^ key[kIndex + 3];
a2 = table0[a>>>24] ^ table1[b>>16 & 255] ^ table2[c>>8 & 255] ^ table3[d & 255] ^ key[kIndex];
b2 = table0[b>>>24] ^ table1[c>>16 & 255] ^ table2[d>>8 & 255] ^ table3[a & 255] ^ key[kIndex + 1];
c2 = table0[c>>>24] ^ table1[d>>16 & 255] ^ table2[a>>8 & 255] ^ table3[b & 255] ^ key[kIndex + 2];
d = table0[d>>>24] ^ table1[a>>16 & 255] ^ table2[b>>8 & 255] ^ table3[c & 255] ^ key[kIndex + 3];
kIndex += 4;
a=a2; b=b2; c=c2;
}
// Last round.
for (i = 0; i < 4; i++) {
out[3 & -i] =
sbox[a>>>24 ]<<24 ^
out[(3 & -i) + offset] =
sbox[a>>>24 ]<<24 ^
sbox[b>>16 & 255]<<16 ^
sbox[c>>8 & 255]<<8 ^
sbox[d & 255] ^
key[kIndex++];
a2=a; a=b; b=c; c=d; d=a2;
}
return out;
}
};
/**
* Decrypt bytes using AES-128 with CBC and PKCS#7 padding.
* @param encrypted {Uint8Array} the encrypted bytes
* @param key {Uint32Array} the bytes of the decryption key
* @param initVector {Uint32Array} the initialization vector (IV) to
* use for the first round of CBC.
* @return {Uint8Array} the decrypted bytes
*
* @see http://en.wikipedia.org/wiki/Advanced_Encryption_Standard
* @see http://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Cipher_Block_Chaining_.28CBC.29
* @see https://tools.ietf.org/html/rfc2315
*/
decrypt = function(encrypted, key, initVector) {
var
encryptedView = new DataView(encrypted.buffer),
platformEndian = new Uint32Array(encrypted.byteLength / 4),
// word-level access to the encrypted bytes
encrypted32 = new Int32Array(encrypted.buffer, encrypted.byteOffset, encrypted.byteLength >> 2),
decipher = new AES(Array.prototype.slice.call(key)),
// byte and word-level access for the decrypted output
decrypted = new Uint8Array(encrypted.byteLength),
decryptedView = new DataView(decrypted.buffer),
decryptedBlock,
word,
byte;
// convert big-endian input to platform byte order for decryption
for (byte = 0; byte < encrypted.byteLength; byte += 4) {
platformEndian[byte >>> 2] = encryptedView.getUint32(byte);
}
decrypted32 = new Int32Array(decrypted.buffer),
// temporary variables for working with the IV, encrypted, and
// decrypted data
init0, init1, init2, init3,
encrypted0, encrypted1, encrypted2, encrypted3,
// iteration variable
wordIx;
// pull out the words of the IV to ensure we don't modify the
// passed-in reference and easier access
init0 = initVector[0];
init1 = initVector[1];
init2 = initVector[2];
init3 = initVector[3];
// decrypt four word sequences, applying cipher-block chaining (CBC)
// to each decrypted block
for (word = 0; word < platformEndian.length; word += 4) {
for (wordIx = 0; wordIx < encrypted32.length; wordIx += 4) {
// convert big-endian (network order) words into little-endian
// (javascript order)
encrypted0 = ntoh(encrypted32[wordIx]);
encrypted1 = ntoh(encrypted32[wordIx + 1]);
encrypted2 = ntoh(encrypted32[wordIx + 2]);
encrypted3 = ntoh(encrypted32[wordIx + 3]);
// decrypt the block
decryptedBlock = decipher.decrypt(platformEndian.subarray(word, word + 4));
decipher.decrypt(encrypted0,
encrypted1,
encrypted2,
encrypted3,
decrypted32,
wordIx);
// XOR with the IV, and restore network byte-order to obtain the
// plaintext
byte = word << 2;
decryptedView.setUint32(byte, decryptedBlock[0] ^ initVector[0]);
decryptedView.setUint32(byte + 4, decryptedBlock[1] ^ initVector[1]);
decryptedView.setUint32(byte + 8, decryptedBlock[2] ^ initVector[2]);
decryptedView.setUint32(byte + 12, decryptedBlock[3] ^ initVector[3]);
decrypted32[wordIx] = ntoh(decrypted32[wordIx] ^ init0);
decrypted32[wordIx + 1] = ntoh(decrypted32[wordIx + 1] ^ init1);
decrypted32[wordIx + 2] = ntoh(decrypted32[wordIx + 2] ^ init2);
decrypted32[wordIx + 3] = ntoh(decrypted32[wordIx + 3] ^ init3);
// setup the IV for the next round
initVector = platformEndian.subarray(word, word + 4);
init0 = encrypted0;
init1 = encrypted1;
init2 = encrypted2;
init3 = encrypted3;
}
// remove any padding
return unpad(decrypted);
return decrypted;
};
AsyncStream = function() {
this.jobs = [];
this.delay = 1;
this.timeout_ = null;
};
AsyncStream.prototype = new videojs.Hls.Stream();
AsyncStream.prototype.processJob_ = function() {
this.jobs.shift()();
if (this.jobs.length) {
this.timeout_ = setTimeout(videojs.bind(this, this.processJob_),
this.delay);
} else {
this.timeout_ = null;
}
};
AsyncStream.prototype.push = function(job) {
this.jobs.push(job);
if (!this.timeout_) {
this.timeout_ = setTimeout(videojs.bind(this, this.processJob_),
this.delay);
}
};
Decrypter = function(encrypted, key, initVector, done) {
var
step = Decrypter.STEP,
encrypted32 = new Int32Array(encrypted.buffer),
decrypted = new Uint8Array(encrypted.byteLength),
i = 0;
this.asyncStream_ = new AsyncStream();
// split up the encryption job and do the individual chunks asynchronously
this.asyncStream_.push(this.decryptChunk_(encrypted32.subarray(i, i + step),
key,
initVector,
decrypted,
i));
for (i = step; i < encrypted32.length; i += step) {
initVector = new Uint32Array([
ntoh(encrypted32[i - 4]),
ntoh(encrypted32[i - 3]),
ntoh(encrypted32[i - 2]),
ntoh(encrypted32[i - 1])
]);
this.asyncStream_.push(this.decryptChunk_(encrypted32.subarray(i, i + step),
key,
initVector,
decrypted));
}
// invoke the done() callback when everything is finished
this.asyncStream_.push(function() {
// remove pkcs#7 padding from the decrypted bytes
done(null, unpad(decrypted));
});
};
Decrypter.prototype = new videojs.Hls.Stream();
Decrypter.prototype.decryptChunk_ = function(encrypted, key, initVector, decrypted) {
return function() {
var bytes = decrypt(encrypted,
key,
initVector);
decrypted.set(bytes, encrypted.byteOffset);
};
};
// the maximum number of bytes to process at one time
Decrypter.STEP = 4 * 8000;
// exports
videojs.Hls.decrypt = decrypt;
videojs.Hls.Decrypter = Decrypter;
videojs.Hls.AsyncStream = AsyncStream;
})(window, window.videojs, window.pkcs7.unpad);
......
......@@ -637,6 +637,8 @@ videojs.Hls.prototype.loadSegment = function(segmentUri, offset) {
responseType: 'arraybuffer',
withCredentials: settings.withCredentials
}, function(error, url) {
var segmentInfo;
// the segment request is no longer outstanding
tech.segmentXhr_ = null;
......@@ -669,12 +671,26 @@ videojs.Hls.prototype.loadSegment = function(segmentUri, offset) {
// if the segment is the start of a timestamp discontinuity,
// we have to wait until the sourcebuffer is empty before
// aborting the source buffer processing
tech.segmentBuffer_.push({
segmentInfo = {
// the segment's mediaIndex at the time it was received
mediaIndex: tech.mediaIndex,
// the segment's playlist
playlist: tech.playlists.media(),
// optionally, a time offset to seek to within the segment
offset: offset,
bytes: new Uint8Array(this.response)
});
// unencrypted bytes of the segment
bytes: null,
// when a key is defined for this segment, the encrypted bytes
encryptedBytes: null,
// optionally, the decrypter that is unencrypting the segment
decrypter: null
};
if (segmentInfo.playlist.segments[segmentInfo.mediaIndex].key) {
segmentInfo.encryptedBytes = new Uint8Array(this.response);
} else {
segmentInfo.bytes = new Uint8Array(this.response);
}
tech.segmentBuffer_.push(segmentInfo);
player.trigger('progress');
tech.drainBuffer();
......@@ -689,12 +705,15 @@ videojs.Hls.prototype.loadSegment = function(segmentUri, offset) {
videojs.Hls.prototype.drainBuffer = function(event) {
var
i = 0,
segmentInfo,
mediaIndex,
playlist,
offset,
tags,
bytes,
segment,
decrypter,
segIv,
ptsTime,
segmentOffset,
......@@ -704,28 +723,45 @@ videojs.Hls.prototype.drainBuffer = function(event) {
return;
}
mediaIndex = segmentBuffer[0].mediaIndex;
playlist = segmentBuffer[0].playlist;
offset = segmentBuffer[0].offset;
bytes = segmentBuffer[0].bytes;
segmentInfo = segmentBuffer[0];
mediaIndex = segmentInfo.mediaIndex;
playlist = segmentInfo.playlist;
offset = segmentInfo.offset;
bytes = segmentInfo.bytes;
segment = playlist.segments[mediaIndex];
if (segment.key) {
if (segment.key && !bytes) {
// this is an encrypted segment
// if the key download failed, we want to skip this segment
// but if the key hasn't downloaded yet, we want to try again later
if (keyFailed(segment.key)) {
return segmentBuffer.shift();
} else if (!segment.key.bytes) {
// trigger a key request if one is not already in-flight
return this.fetchKeys_();
} else if (segmentInfo.decrypter) {
// decryption is in progress, try again later
return;
} else {
// if the media sequence is greater than 2^32, the IV will be incorrect
// assuming 10s segments, that would be about 1300 years
var segIv = segment.key.iv || new Uint32Array([0, 0, 0, mediaIndex + playlist.mediaSequence]);
bytes = videojs.Hls.decrypt(bytes,
segment.key.bytes,
segIv);
segIv = segment.key.iv || new Uint32Array([0, 0, 0, mediaIndex + playlist.mediaSequence]);
// create a decrypter to incrementally decrypt the segment
decrypter = new videojs.Hls.Decrypter(segmentInfo.encryptedBytes,
segment.key.bytes,
segIv,
function(err, bytes) {
segmentInfo.bytes = bytes;
});
segmentInfo.decrypter = decrypter;
return;
}
}
......
(function(window, videojs, undefined) {
(function(window, videojs, unpad, undefined) {
'use strict';
/*
======== A Handy Little QUnit Reference ========
......@@ -46,7 +46,7 @@ test('decrypts a single AES-128 with PKCS7 block', function() {
0x82, 0xa8, 0xf0, 0x67]);
deepEqual('howdy folks',
stringFromBytes(videojs.Hls.decrypt(encrypted, key, initVector)),
stringFromBytes(unpad(videojs.Hls.decrypt(encrypted, key, initVector))),
'decrypted with a byte array key');
});
......@@ -68,8 +68,100 @@ test('decrypts multiple AES-128 blocks with CBC', function() {
]);
deepEqual('0123456789abcdef01234',
stringFromBytes(videojs.Hls.decrypt(encrypted, key, initVector)),
stringFromBytes(unpad(videojs.Hls.decrypt(encrypted, key, initVector))),
'decrypted multiple blocks');
});
})(window, window.videojs);
var clock;
module('Incremental Processing', {
setup: function() {
clock = sinon.useFakeTimers();
},
teardown: function() {
clock.restore();
}
});
test('executes a callback after a timeout', function() {
var asyncStream = new videojs.Hls.AsyncStream(),
calls = '';
asyncStream.push(function() {
calls += 'a';
});
clock.tick(asyncStream.delay);
equal(calls, 'a', 'invoked the callback once');
clock.tick(asyncStream.delay);
equal(calls, 'a', 'only invoked the callback once');
});
test('executes callback in series', function() {
var asyncStream = new videojs.Hls.AsyncStream(),
calls = '';
asyncStream.push(function() {
calls += 'a';
});
asyncStream.push(function() {
calls += 'b';
});
clock.tick(asyncStream.delay);
equal(calls, 'a', 'invoked the first callback');
clock.tick(asyncStream.delay);
equal(calls, 'ab', 'invoked the second');
});
var decrypter;
module('Incremental Decryption', {
setup: function() {
clock = sinon.useFakeTimers();
},
teardown: function() {
clock.restore();
}
});
test('asynchronously decrypts a 4-word block', function() {
var
key = new Uint32Array([0, 0, 0, 0]),
initVector = key,
// the string "howdy folks" encrypted
encrypted = new Uint8Array([
0xce, 0x90, 0x97, 0xd0,
0x08, 0x46, 0x4d, 0x18,
0x4f, 0xae, 0x01, 0x1c,
0x82, 0xa8, 0xf0, 0x67]),
decrypted;
decrypter = new videojs.Hls.Decrypter(encrypted, key, initVector, function(error, result) {
decrypted = result;
});
ok(!decrypted, 'asynchronously decrypts');
clock.tick(decrypter.asyncStream_.delay * 2);
ok(decrypted, 'completed decryption');
deepEqual('howdy folks',
stringFromBytes(decrypted),
'decrypts and unpads the result');
});
test('breaks up input greater than the step value', function() {
var encrypted = new Int32Array(videojs.Hls.Decrypter.STEP + 4),
done = false,
decrypter = new videojs.Hls.Decrypter(encrypted,
new Uint32Array(4),
new Uint32Array(4),
function() {
done = true;
});
clock.tick(decrypter.asyncStream_.delay * 2);
ok(!done, 'not finished after two ticks');
clock.tick(decrypter.asyncStream_.delay);
ok(done, 'finished after the last chunk is decrypted');
});
})(window, window.videojs, window.pkcs7.unpad);
......
......@@ -152,10 +152,8 @@ module('HLS', {
oldNativeHlsSupport = videojs.Hls.supportsNativeHls;
oldDecrypt = videojs.Hls.decrypt;
videojs.Hls.decrypt = function() {
return new Uint8Array([0]);
};
oldDecrypt = videojs.Hls.Decrypter;
videojs.Hls.Decrypter = function() {};
// fake XHRs
xhr = sinon.useFakeXMLHttpRequest();
......@@ -173,7 +171,7 @@ module('HLS', {
videojs.MediaSource.open = oldMediaSourceOpen;
videojs.Hls.SegmentParser = oldSegmentParser;
videojs.Hls.supportsNativeHls = oldNativeHlsSupport;
videojs.Hls.decrypt = oldDecrypt;
videojs.Hls.Decrypter = oldDecrypt;
videojs.SourceBuffer = oldSourceBuffer;
window.setTimeout = oldSetTimeout;
window.clearTimeout = oldClearTimeout;
......@@ -1904,6 +1902,10 @@ test('a new key XHR is created when a the segment is received', function() {
'#EXT-X-ENDLIST\n');
standardXHRResponse(requests.shift()); // segment 1
standardXHRResponse(requests.shift()); // key 1
// "finish" decrypting segment 1
player.hls.segmentBuffer_[0].bytes = new Uint8Array(16);
player.hls.checkBuffer_();
standardXHRResponse(requests.shift()); // segment 2
strictEqual(requests.length, 1, 'a key XHR is created');
......@@ -2009,6 +2011,9 @@ test('skip segments if key requests fail more than once', function() {
// key for second segment
requests[0].response = new Uint32Array([0,0,0,0]).buffer;
requests.shift().respond(200, null, '');
// "finish" decryption
player.hls.segmentBuffer_[0].bytes = new Uint8Array(16);
player.hls.checkBuffer_();
equal(bytes.length, 2, 'bytes from the second ts segment should be added');
equal(bytes[1], 1, 'the bytes from the second segment are added and not the first');
......@@ -2033,9 +2038,8 @@ test('the key is supplied to the decrypter in the correct format', function() {
'http://media.example.com/fileSequence52-B.ts\n');
videojs.Hls.decrypt = function(bytes, key) {
videojs.Hls.Decrypter = function(encrypted, key) {
keys.push(key);
return new Uint8Array([0]);
};
standardXHRResponse(requests.shift()); // segment
......@@ -2043,7 +2047,7 @@ test('the key is supplied to the decrypter in the correct format', function() {
requests[0].respond(200, null, '');
requests.shift(); // key
equal(keys.length, 1, 'only one call to decrypt was made');
equal(keys.length, 1, 'only one Decrypter was constructed');
deepEqual(keys[0],
new Uint32Array([0, 0x01000000, 0x02000000, 0x03000000]),
'passed the specified segment key');
......@@ -2068,9 +2072,8 @@ test('supplies the media sequence of current segment as the IV by default, if no
'http://media.example.com/fileSequence52-B.ts\n');
videojs.Hls.decrypt = function(bytes, key, iv) {
videojs.Hls.Decrypter = function(encrypted, key, iv) {
ivs.push(iv);
return new Uint8Array([0]);
};
requests[0].response = new Uint32Array([0,0,0,0]).buffer;
......@@ -2078,7 +2081,7 @@ test('supplies the media sequence of current segment as the IV by default, if no
requests.shift();
standardXHRResponse(requests.pop());
equal(ivs.length, 1, 'only one call to decrypt was made');
equal(ivs.length, 1, 'only one Decrypter was constructed');
deepEqual(ivs[0],
new Uint32Array([0, 0, 0, 5]),
'the IV for the segment is the media sequence');
......