b56951a6 by David LaPalomento

Optimize decryption

For #218. Use TypedArrays instead of DataViews because they weren't getting optimized very well. Use Int32Arrays instead of Uint32Arrays to avoid v8 deoptimization at runtime when negative values were encountered. Swap byte order in the main loop instead of before-hand. These changes don't get decrypt running in less than 16ms on my machine but they do speed things up an order of magnitude or so. Additional work to make the decryption asynchronous appears necessary.
1 parent 1670a237
......@@ -151,49 +151,51 @@ AES.prototype = {
},
/**
* Decrypt an array of 4 big-endian words.
* @param {Array} data The ciphertext.
* Decrypt 16 bytes, specified as four 32-bit words.
* @param encrypted0 {number} the first word to decrypt
* @param encrypted1 {number} the second word to decrypt
* @param encrypted2 {number} the third word to decrypt
* @param encrypted3 {number} the fourth word to decrypt
* @param out {Int32Array} the array to write the decrypted words
* into
* @param offset {number} the offset into the output array to start
* writing results
* @return {Array} The plaintext.
*/
decrypt:function (input) {
if (input.length !== 4) {
throw new Error("Invalid aes block size");
}
decrypt:function (encrypted0, encrypted1, encrypted2, encrypted3, out, offset) {
var key = this._key[1],
// state variables a,b,c,d are loaded with pre-whitened data
a = input[0] ^ key[0],
b = input[3] ^ key[1],
c = input[2] ^ key[2],
d = input[1] ^ key[3],
a = encrypted0 ^ key[0],
b = encrypted3 ^ key[1],
c = encrypted2 ^ key[2],
d = encrypted1 ^ key[3],
a2, b2, c2,
nInnerRounds = key.length/4 - 2,
nInnerRounds = key.length / 4 - 2, // key.length === 2 ?
i,
kIndex = 4,
out = [0,0,0,0],
table = this._tables[1],
// load up the tables
t0 = table[0],
t1 = table[1],
t2 = table[2],
t3 = table[3],
table0 = table[0],
table1 = table[1],
table2 = table[2],
table3 = table[3],
sbox = table[4];
// Inner rounds. Cribbed from OpenSSL.
for (i = 0; i < nInnerRounds; i++) {
a2 = t0[a>>>24] ^ t1[b>>16 & 255] ^ t2[c>>8 & 255] ^ t3[d & 255] ^ key[kIndex];
b2 = t0[b>>>24] ^ t1[c>>16 & 255] ^ t2[d>>8 & 255] ^ t3[a & 255] ^ key[kIndex + 1];
c2 = t0[c>>>24] ^ t1[d>>16 & 255] ^ t2[a>>8 & 255] ^ t3[b & 255] ^ key[kIndex + 2];
d = t0[d>>>24] ^ t1[a>>16 & 255] ^ t2[b>>8 & 255] ^ t3[c & 255] ^ key[kIndex + 3];
a2 = table0[a>>>24] ^ table1[b>>16 & 255] ^ table2[c>>8 & 255] ^ table3[d & 255] ^ key[kIndex];
b2 = table0[b>>>24] ^ table1[c>>16 & 255] ^ table2[d>>8 & 255] ^ table3[a & 255] ^ key[kIndex + 1];
c2 = table0[c>>>24] ^ table1[d>>16 & 255] ^ table2[a>>8 & 255] ^ table3[b & 255] ^ key[kIndex + 2];
d = table0[d>>>24] ^ table1[a>>16 & 255] ^ table2[b>>8 & 255] ^ table3[c & 255] ^ key[kIndex + 3];
kIndex += 4;
a=a2; b=b2; c=c2;
}
// Last round.
for (i = 0; i < 4; i++) {
out[3 & -i] =
out[(3 & -i) + offset] =
sbox[a>>>24 ]<<24 ^
sbox[b>>16 & 255]<<16 ^
sbox[c>>8 & 255]<<8 ^
......@@ -201,42 +203,102 @@ AES.prototype = {
key[kIndex++];
a2=a; a=b; b=c; c=d; d=a2;
}
return out;
}
};
/**
* Decrypt bytes using AES-128 with CBC and PKCS#7 padding.
* @param encrypted {Uint8Array} the encrypted bytes
* @param key {Uint32Array} the bytes of the decryption key
* @param initVector {Uint32Array} the initialization vector (IV) to
* use for the first round of CBC.
* @return {Uint8Array} the decrypted bytes
*
* @see http://en.wikipedia.org/wiki/Advanced_Encryption_Standard
* @see http://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Cipher_Block_Chaining_.28CBC.29
* @see https://tools.ietf.org/html/rfc2315
*/
decrypt = function(encrypted, key, initVector) {
var
encryptedView = new DataView(encrypted.buffer),
platformEndian = new Uint32Array(encrypted.byteLength / 4),
// word-level access to the encrypted bytes
encrypted32 = new Int32Array(encrypted.buffer),
decipher = new AES(Array.prototype.slice.call(key)),
// byte and word-level access for the decrypted output
decrypted = new Uint8Array(encrypted.byteLength),
decryptedView = new DataView(decrypted.buffer),
decryptedBlock,
word,
byte;
// convert big-endian input to platform byte order for decryption
for (byte = 0; byte < encrypted.byteLength; byte += 4) {
platformEndian[byte >>> 2] = encryptedView.getUint32(byte);
}
decrypted32 = new Int32Array(decrypted.buffer),
// temporary variables for working with the IV, encrypted, and
// decrypted data
init0, init1, init2, init3,
encrypted0, encrypted1, encrypted2, encrypted3,
decrypted0, decrypted1, decrypted2, decrypted3,
// iteration variable
wordIx;
// pull out the words of the IV to ensure we don't modify the
// passed-in reference and easier access
init0 = initVector[0];
init1 = initVector[1];
init2 = initVector[2];
init3 = initVector[3];
// decrypt four word sequences, applying cipher-block chaining (CBC)
// to each decrypted block
for (word = 0; word < platformEndian.length; word += 4) {
for (wordIx = 0; wordIx < encrypted32.length; wordIx += 4) {
// convert big-endian (network order) words into little-endian
// (javascript order)
encrypted0 = (encrypted32[wordIx] << 24) |
((encrypted32[wordIx] & 0xff00) << 8) |
((encrypted32[wordIx] & 0xff0000) >> 8) |
(encrypted32[wordIx] >>> 24);
encrypted1 = (encrypted32[wordIx + 1] << 24) |
((encrypted32[wordIx + 1] & 0xff00) << 8) |
((encrypted32[wordIx + 1] & 0xff0000) >> 8) |
(encrypted32[wordIx + 1] >>> 24);
encrypted2 = (encrypted32[wordIx + 2] << 24) |
((encrypted32[wordIx + 2] & 0xff00) << 8) |
((encrypted32[wordIx + 2] & 0xff0000) >> 8) |
(encrypted32[wordIx + 2] >>> 24);
encrypted3 = (encrypted32[wordIx + 3] << 24) |
((encrypted32[wordIx + 3] & 0xff00) << 8) |
((encrypted32[wordIx + 3] & 0xff0000) >> 8) |
(encrypted32[wordIx + 3] >>> 24);
// decrypt the block
decryptedBlock = decipher.decrypt(platformEndian.subarray(word, word + 4));
decipher.decrypt(encrypted0, encrypted1, encrypted2, encrypted3, decrypted32, wordIx);
// XOR with the IV, and restore network byte-order to obtain the
// plaintext
byte = word << 2;
decryptedView.setUint32(byte, decryptedBlock[0] ^ initVector[0]);
decryptedView.setUint32(byte + 4, decryptedBlock[1] ^ initVector[1]);
decryptedView.setUint32(byte + 8, decryptedBlock[2] ^ initVector[2]);
decryptedView.setUint32(byte + 12, decryptedBlock[3] ^ initVector[3]);
decrypted0 = decrypted32[wordIx] ^ init0;
decrypted1 = decrypted32[wordIx + 1] ^ init1;
decrypted2 = decrypted32[wordIx + 2] ^ init2;
decrypted3 = decrypted32[wordIx + 3] ^ init3;
decrypted32[wordIx] = decrypted0 << 24 |
((decrypted0 & 0xff00) << 8) |
((decrypted0 & 0xff0000) >> 8) |
(decrypted0 >>> 24);
decrypted32[wordIx + 1] = decrypted1 << 24 |
((decrypted1 & 0xff00) << 8) |
((decrypted1 & 0xff0000) >> 8) |
(decrypted1 >>> 24);
decrypted32[wordIx + 2] = decrypted2 << 24 |
((decrypted2 & 0xff00) << 8) |
((decrypted2 & 0xff0000) >> 8) |
(decrypted2 >>> 24);
decrypted32[wordIx + 3] = decrypted3 << 24 |
((decrypted3 & 0xff00) << 8) |
((decrypted3 & 0xff0000) >> 8) |
(decrypted3 >>> 24);
// setup the IV for the next round
initVector = platformEndian.subarray(word, word + 4);
init0 = encrypted0;
init1 = encrypted1;
init2 = encrypted2;
init3 = encrypted3;
}
// remove any padding
......