b56951a6 by David LaPalomento

Optimize decryption

For #218. Use TypedArrays instead of DataViews because they weren't getting optimized very well. Use Int32Arrays instead of Uint32Arrays to avoid v8 deoptimization at runtime when negative values were encountered. Swap byte order in the main loop instead of before-hand. These changes don't get decrypt running in less than 16ms on my machine but they do speed things up an order of magnitude or so. Additional work to make the decryption asynchronous appears necessary.
1 parent 1670a237
...@@ -49,38 +49,38 @@ var AES, decrypt; ...@@ -49,38 +49,38 @@ var AES, decrypt;
49 */ 49 */
50 AES = function (key) { 50 AES = function (key) {
51 this._precompute(); 51 this._precompute();
52 52
53 var i, j, tmp, 53 var i, j, tmp,
54 encKey, decKey, 54 encKey, decKey,
55 sbox = this._tables[0][4], decTable = this._tables[1], 55 sbox = this._tables[0][4], decTable = this._tables[1],
56 keyLen = key.length, rcon = 1; 56 keyLen = key.length, rcon = 1;
57 57
58 if (keyLen !== 4 && keyLen !== 6 && keyLen !== 8) { 58 if (keyLen !== 4 && keyLen !== 6 && keyLen !== 8) {
59 throw new Error("Invalid aes key size"); 59 throw new Error("Invalid aes key size");
60 } 60 }
61 61
62 encKey = key.slice(0); 62 encKey = key.slice(0);
63 decKey = []; 63 decKey = [];
64 this._key = [encKey, decKey]; 64 this._key = [encKey, decKey];
65 65
66 // schedule encryption keys 66 // schedule encryption keys
67 for (i = keyLen; i < 4 * keyLen + 28; i++) { 67 for (i = keyLen; i < 4 * keyLen + 28; i++) {
68 tmp = encKey[i-1]; 68 tmp = encKey[i-1];
69 69
70 // apply sbox 70 // apply sbox
71 if (i%keyLen === 0 || (keyLen === 8 && i%keyLen === 4)) { 71 if (i%keyLen === 0 || (keyLen === 8 && i%keyLen === 4)) {
72 tmp = sbox[tmp>>>24]<<24 ^ sbox[tmp>>16&255]<<16 ^ sbox[tmp>>8&255]<<8 ^ sbox[tmp&255]; 72 tmp = sbox[tmp>>>24]<<24 ^ sbox[tmp>>16&255]<<16 ^ sbox[tmp>>8&255]<<8 ^ sbox[tmp&255];
73 73
74 // shift rows and add rcon 74 // shift rows and add rcon
75 if (i%keyLen === 0) { 75 if (i%keyLen === 0) {
76 tmp = tmp<<8 ^ tmp>>>24 ^ rcon<<24; 76 tmp = tmp<<8 ^ tmp>>>24 ^ rcon<<24;
77 rcon = rcon<<1 ^ (rcon>>7)*283; 77 rcon = rcon<<1 ^ (rcon>>7)*283;
78 } 78 }
79 } 79 }
80 80
81 encKey[i] = encKey[i-keyLen] ^ tmp; 81 encKey[i] = encKey[i-keyLen] ^ tmp;
82 } 82 }
83 83
84 // schedule decryption keys 84 // schedule decryption keys
85 for (j = 0; i; j++, i--) { 85 for (j = 0; i; j++, i--) {
86 tmp = encKey[j&3 ? i : i - 4]; 86 tmp = encKey[j&3 ? i : i - 4];
...@@ -124,119 +124,181 @@ AES.prototype = { ...@@ -124,119 +124,181 @@ AES.prototype = {
124 for (i = 0; i < 256; i++) { 124 for (i = 0; i < 256; i++) {
125 th[( d[i] = i<<1 ^ (i>>7)*283 )^i]=i; 125 th[( d[i] = i<<1 ^ (i>>7)*283 )^i]=i;
126 } 126 }
127 127
128 for (x = xInv = 0; !sbox[x]; x ^= x2 || 1, xInv = th[xInv] || 1) { 128 for (x = xInv = 0; !sbox[x]; x ^= x2 || 1, xInv = th[xInv] || 1) {
129 // Compute sbox 129 // Compute sbox
130 s = xInv ^ xInv<<1 ^ xInv<<2 ^ xInv<<3 ^ xInv<<4; 130 s = xInv ^ xInv<<1 ^ xInv<<2 ^ xInv<<3 ^ xInv<<4;
131 s = s>>8 ^ s&255 ^ 99; 131 s = s>>8 ^ s&255 ^ 99;
132 sbox[x] = s; 132 sbox[x] = s;
133 sboxInv[s] = x; 133 sboxInv[s] = x;
134 134
135 // Compute MixColumns 135 // Compute MixColumns
136 x8 = d[x4 = d[x2 = d[x]]]; 136 x8 = d[x4 = d[x2 = d[x]]];
137 tDec = x8*0x1010101 ^ x4*0x10001 ^ x2*0x101 ^ x*0x1010100; 137 tDec = x8*0x1010101 ^ x4*0x10001 ^ x2*0x101 ^ x*0x1010100;
138 tEnc = d[s]*0x101 ^ s*0x1010100; 138 tEnc = d[s]*0x101 ^ s*0x1010100;
139 139
140 for (i = 0; i < 4; i++) { 140 for (i = 0; i < 4; i++) {
141 encTable[i][x] = tEnc = tEnc<<24 ^ tEnc>>>8; 141 encTable[i][x] = tEnc = tEnc<<24 ^ tEnc>>>8;
142 decTable[i][s] = tDec = tDec<<24 ^ tDec>>>8; 142 decTable[i][s] = tDec = tDec<<24 ^ tDec>>>8;
143 } 143 }
144 } 144 }
145 145
146 // Compactify. Considerable speedup on Firefox. 146 // Compactify. Considerable speedup on Firefox.
147 for (i = 0; i < 5; i++) { 147 for (i = 0; i < 5; i++) {
148 encTable[i] = encTable[i].slice(0); 148 encTable[i] = encTable[i].slice(0);
149 decTable[i] = decTable[i].slice(0); 149 decTable[i] = decTable[i].slice(0);
150 } 150 }
151 }, 151 },
152 152
153 /** 153 /**
154 * Decrypt an array of 4 big-endian words. 154 * Decrypt 16 bytes, specified as four 32-bit words.
155 * @param {Array} data The ciphertext. 155 * @param encrypted0 {number} the first word to decrypt
156 * @param encrypted1 {number} the second word to decrypt
157 * @param encrypted2 {number} the third word to decrypt
158 * @param encrypted3 {number} the fourth word to decrypt
159 * @param out {Int32Array} the array to write the decrypted words
160 * into
161 * @param offset {number} the offset into the output array to start
162 * writing results
156 * @return {Array} The plaintext. 163 * @return {Array} The plaintext.
157 */ 164 */
158 decrypt:function (input) { 165 decrypt:function (encrypted0, encrypted1, encrypted2, encrypted3, out, offset) {
159 if (input.length !== 4) {
160 throw new Error("Invalid aes block size");
161 }
162
163 var key = this._key[1], 166 var key = this._key[1],
164 // state variables a,b,c,d are loaded with pre-whitened data 167 // state variables a,b,c,d are loaded with pre-whitened data
165 a = input[0] ^ key[0], 168 a = encrypted0 ^ key[0],
166 b = input[3] ^ key[1], 169 b = encrypted3 ^ key[1],
167 c = input[2] ^ key[2], 170 c = encrypted2 ^ key[2],
168 d = input[1] ^ key[3], 171 d = encrypted1 ^ key[3],
169 a2, b2, c2, 172 a2, b2, c2,
170 173
171 nInnerRounds = key.length/4 - 2, 174 nInnerRounds = key.length / 4 - 2, // key.length === 2 ?
172 i, 175 i,
173 kIndex = 4, 176 kIndex = 4,
174 out = [0,0,0,0],
175 table = this._tables[1], 177 table = this._tables[1],
176 178
177 // load up the tables 179 // load up the tables
178 t0 = table[0], 180 table0 = table[0],
179 t1 = table[1], 181 table1 = table[1],
180 t2 = table[2], 182 table2 = table[2],
181 t3 = table[3], 183 table3 = table[3],
182 sbox = table[4]; 184 sbox = table[4];
183 185
184 // Inner rounds. Cribbed from OpenSSL. 186 // Inner rounds. Cribbed from OpenSSL.
185 for (i = 0; i < nInnerRounds; i++) { 187 for (i = 0; i < nInnerRounds; i++) {
186 a2 = t0[a>>>24] ^ t1[b>>16 & 255] ^ t2[c>>8 & 255] ^ t3[d & 255] ^ key[kIndex]; 188 a2 = table0[a>>>24] ^ table1[b>>16 & 255] ^ table2[c>>8 & 255] ^ table3[d & 255] ^ key[kIndex];
187 b2 = t0[b>>>24] ^ t1[c>>16 & 255] ^ t2[d>>8 & 255] ^ t3[a & 255] ^ key[kIndex + 1]; 189 b2 = table0[b>>>24] ^ table1[c>>16 & 255] ^ table2[d>>8 & 255] ^ table3[a & 255] ^ key[kIndex + 1];
188 c2 = t0[c>>>24] ^ t1[d>>16 & 255] ^ t2[a>>8 & 255] ^ t3[b & 255] ^ key[kIndex + 2]; 190 c2 = table0[c>>>24] ^ table1[d>>16 & 255] ^ table2[a>>8 & 255] ^ table3[b & 255] ^ key[kIndex + 2];
189 d = t0[d>>>24] ^ t1[a>>16 & 255] ^ t2[b>>8 & 255] ^ t3[c & 255] ^ key[kIndex + 3]; 191 d = table0[d>>>24] ^ table1[a>>16 & 255] ^ table2[b>>8 & 255] ^ table3[c & 255] ^ key[kIndex + 3];
190 kIndex += 4; 192 kIndex += 4;
191 a=a2; b=b2; c=c2; 193 a=a2; b=b2; c=c2;
192 } 194 }
193 195
194 // Last round. 196 // Last round.
195 for (i = 0; i < 4; i++) { 197 for (i = 0; i < 4; i++) {
196 out[3 & -i] = 198 out[(3 & -i) + offset] =
197 sbox[a>>>24 ]<<24 ^ 199 sbox[a>>>24 ]<<24 ^
198 sbox[b>>16 & 255]<<16 ^ 200 sbox[b>>16 & 255]<<16 ^
199 sbox[c>>8 & 255]<<8 ^ 201 sbox[c>>8 & 255]<<8 ^
200 sbox[d & 255] ^ 202 sbox[d & 255] ^
201 key[kIndex++]; 203 key[kIndex++];
202 a2=a; a=b; b=c; c=d; d=a2; 204 a2=a; a=b; b=c; c=d; d=a2;
203 } 205 }
204
205 return out;
206 } 206 }
207 }; 207 };
208 208
209 /**
210 * Decrypt bytes using AES-128 with CBC and PKCS#7 padding.
211 * @param encrypted {Uint8Array} the encrypted bytes
212 * @param key {Uint32Array} the bytes of the decryption key
213 * @param initVector {Uint32Array} the initialization vector (IV) to
214 * use for the first round of CBC.
215 * @return {Uint8Array} the decrypted bytes
216 *
217 * @see http://en.wikipedia.org/wiki/Advanced_Encryption_Standard
218 * @see http://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Cipher_Block_Chaining_.28CBC.29
219 * @see https://tools.ietf.org/html/rfc2315
220 */
209 decrypt = function(encrypted, key, initVector) { 221 decrypt = function(encrypted, key, initVector) {
210 var 222 var
211 encryptedView = new DataView(encrypted.buffer), 223 // word-level access to the encrypted bytes
212 platformEndian = new Uint32Array(encrypted.byteLength / 4), 224 encrypted32 = new Int32Array(encrypted.buffer),
225
213 decipher = new AES(Array.prototype.slice.call(key)), 226 decipher = new AES(Array.prototype.slice.call(key)),
227
228 // byte and word-level access for the decrypted output
214 decrypted = new Uint8Array(encrypted.byteLength), 229 decrypted = new Uint8Array(encrypted.byteLength),
215 decryptedView = new DataView(decrypted.buffer), 230 decrypted32 = new Int32Array(decrypted.buffer),
216 decryptedBlock, 231
217 word, 232 // temporary variables for working with the IV, encrypted, and
218 byte; 233 // decrypted data
219 234 init0, init1, init2, init3,
220 // convert big-endian input to platform byte order for decryption 235 encrypted0, encrypted1, encrypted2, encrypted3,
221 for (byte = 0; byte < encrypted.byteLength; byte += 4) { 236 decrypted0, decrypted1, decrypted2, decrypted3,
222 platformEndian[byte >>> 2] = encryptedView.getUint32(byte); 237
223 } 238 // iteration variable
239 wordIx;
240
241 // pull out the words of the IV to ensure we don't modify the
242 // passed-in reference and easier access
243 init0 = initVector[0];
244 init1 = initVector[1];
245 init2 = initVector[2];
246 init3 = initVector[3];
247
224 // decrypt four word sequences, applying cipher-block chaining (CBC) 248 // decrypt four word sequences, applying cipher-block chaining (CBC)
225 // to each decrypted block 249 // to each decrypted block
226 for (word = 0; word < platformEndian.length; word += 4) { 250 for (wordIx = 0; wordIx < encrypted32.length; wordIx += 4) {
251 // convert big-endian (network order) words into little-endian
252 // (javascript order)
253 encrypted0 = (encrypted32[wordIx] << 24) |
254 ((encrypted32[wordIx] & 0xff00) << 8) |
255 ((encrypted32[wordIx] & 0xff0000) >> 8) |
256 (encrypted32[wordIx] >>> 24);
257 encrypted1 = (encrypted32[wordIx + 1] << 24) |
258 ((encrypted32[wordIx + 1] & 0xff00) << 8) |
259 ((encrypted32[wordIx + 1] & 0xff0000) >> 8) |
260 (encrypted32[wordIx + 1] >>> 24);
261 encrypted2 = (encrypted32[wordIx + 2] << 24) |
262 ((encrypted32[wordIx + 2] & 0xff00) << 8) |
263 ((encrypted32[wordIx + 2] & 0xff0000) >> 8) |
264 (encrypted32[wordIx + 2] >>> 24);
265 encrypted3 = (encrypted32[wordIx + 3] << 24) |
266 ((encrypted32[wordIx + 3] & 0xff00) << 8) |
267 ((encrypted32[wordIx + 3] & 0xff0000) >> 8) |
268 (encrypted32[wordIx + 3] >>> 24);
269
227 // decrypt the block 270 // decrypt the block
228 decryptedBlock = decipher.decrypt(platformEndian.subarray(word, word + 4)); 271 decipher.decrypt(encrypted0, encrypted1, encrypted2, encrypted3, decrypted32, wordIx);
229 272
230 // XOR with the IV, and restore network byte-order to obtain the 273 // XOR with the IV, and restore network byte-order to obtain the
231 // plaintext 274 // plaintext
232 byte = word << 2; 275 decrypted0 = decrypted32[wordIx] ^ init0;
233 decryptedView.setUint32(byte, decryptedBlock[0] ^ initVector[0]); 276 decrypted1 = decrypted32[wordIx + 1] ^ init1;
234 decryptedView.setUint32(byte + 4, decryptedBlock[1] ^ initVector[1]); 277 decrypted2 = decrypted32[wordIx + 2] ^ init2;
235 decryptedView.setUint32(byte + 8, decryptedBlock[2] ^ initVector[2]); 278 decrypted3 = decrypted32[wordIx + 3] ^ init3;
236 decryptedView.setUint32(byte + 12, decryptedBlock[3] ^ initVector[3]); 279
280 decrypted32[wordIx] = decrypted0 << 24 |
281 ((decrypted0 & 0xff00) << 8) |
282 ((decrypted0 & 0xff0000) >> 8) |
283 (decrypted0 >>> 24);
284 decrypted32[wordIx + 1] = decrypted1 << 24 |
285 ((decrypted1 & 0xff00) << 8) |
286 ((decrypted1 & 0xff0000) >> 8) |
287 (decrypted1 >>> 24);
288 decrypted32[wordIx + 2] = decrypted2 << 24 |
289 ((decrypted2 & 0xff00) << 8) |
290 ((decrypted2 & 0xff0000) >> 8) |
291 (decrypted2 >>> 24);
292 decrypted32[wordIx + 3] = decrypted3 << 24 |
293 ((decrypted3 & 0xff00) << 8) |
294 ((decrypted3 & 0xff0000) >> 8) |
295 (decrypted3 >>> 24);
237 296
238 // setup the IV for the next round 297 // setup the IV for the next round
239 initVector = platformEndian.subarray(word, word + 4); 298 init0 = encrypted0;
299 init1 = encrypted1;
300 init2 = encrypted2;
301 init3 = encrypted3;
240 } 302 }
241 303
242 // remove any padding 304 // remove any padding
......