Optimize decryption
For #218. Use TypedArrays instead of DataViews because they weren't getting optimized very well. Use Int32Arrays instead of Uint32Arrays to avoid v8 deoptimization at runtime when negative values were encountered. Swap byte order in the main loop instead of before-hand. These changes don't get decrypt running in less than 16ms on my machine but they do speed things up an order of magnitude or so. Additional work to make the decryption asynchronous appears necessary.
Showing
1 changed file
with
123 additions
and
61 deletions
... | @@ -49,38 +49,38 @@ var AES, decrypt; | ... | @@ -49,38 +49,38 @@ var AES, decrypt; |
49 | */ | 49 | */ |
50 | AES = function (key) { | 50 | AES = function (key) { |
51 | this._precompute(); | 51 | this._precompute(); |
52 | 52 | ||
53 | var i, j, tmp, | 53 | var i, j, tmp, |
54 | encKey, decKey, | 54 | encKey, decKey, |
55 | sbox = this._tables[0][4], decTable = this._tables[1], | 55 | sbox = this._tables[0][4], decTable = this._tables[1], |
56 | keyLen = key.length, rcon = 1; | 56 | keyLen = key.length, rcon = 1; |
57 | 57 | ||
58 | if (keyLen !== 4 && keyLen !== 6 && keyLen !== 8) { | 58 | if (keyLen !== 4 && keyLen !== 6 && keyLen !== 8) { |
59 | throw new Error("Invalid aes key size"); | 59 | throw new Error("Invalid aes key size"); |
60 | } | 60 | } |
61 | 61 | ||
62 | encKey = key.slice(0); | 62 | encKey = key.slice(0); |
63 | decKey = []; | 63 | decKey = []; |
64 | this._key = [encKey, decKey]; | 64 | this._key = [encKey, decKey]; |
65 | 65 | ||
66 | // schedule encryption keys | 66 | // schedule encryption keys |
67 | for (i = keyLen; i < 4 * keyLen + 28; i++) { | 67 | for (i = keyLen; i < 4 * keyLen + 28; i++) { |
68 | tmp = encKey[i-1]; | 68 | tmp = encKey[i-1]; |
69 | 69 | ||
70 | // apply sbox | 70 | // apply sbox |
71 | if (i%keyLen === 0 || (keyLen === 8 && i%keyLen === 4)) { | 71 | if (i%keyLen === 0 || (keyLen === 8 && i%keyLen === 4)) { |
72 | tmp = sbox[tmp>>>24]<<24 ^ sbox[tmp>>16&255]<<16 ^ sbox[tmp>>8&255]<<8 ^ sbox[tmp&255]; | 72 | tmp = sbox[tmp>>>24]<<24 ^ sbox[tmp>>16&255]<<16 ^ sbox[tmp>>8&255]<<8 ^ sbox[tmp&255]; |
73 | 73 | ||
74 | // shift rows and add rcon | 74 | // shift rows and add rcon |
75 | if (i%keyLen === 0) { | 75 | if (i%keyLen === 0) { |
76 | tmp = tmp<<8 ^ tmp>>>24 ^ rcon<<24; | 76 | tmp = tmp<<8 ^ tmp>>>24 ^ rcon<<24; |
77 | rcon = rcon<<1 ^ (rcon>>7)*283; | 77 | rcon = rcon<<1 ^ (rcon>>7)*283; |
78 | } | 78 | } |
79 | } | 79 | } |
80 | 80 | ||
81 | encKey[i] = encKey[i-keyLen] ^ tmp; | 81 | encKey[i] = encKey[i-keyLen] ^ tmp; |
82 | } | 82 | } |
83 | 83 | ||
84 | // schedule decryption keys | 84 | // schedule decryption keys |
85 | for (j = 0; i; j++, i--) { | 85 | for (j = 0; i; j++, i--) { |
86 | tmp = encKey[j&3 ? i : i - 4]; | 86 | tmp = encKey[j&3 ? i : i - 4]; |
... | @@ -124,119 +124,181 @@ AES.prototype = { | ... | @@ -124,119 +124,181 @@ AES.prototype = { |
124 | for (i = 0; i < 256; i++) { | 124 | for (i = 0; i < 256; i++) { |
125 | th[( d[i] = i<<1 ^ (i>>7)*283 )^i]=i; | 125 | th[( d[i] = i<<1 ^ (i>>7)*283 )^i]=i; |
126 | } | 126 | } |
127 | 127 | ||
128 | for (x = xInv = 0; !sbox[x]; x ^= x2 || 1, xInv = th[xInv] || 1) { | 128 | for (x = xInv = 0; !sbox[x]; x ^= x2 || 1, xInv = th[xInv] || 1) { |
129 | // Compute sbox | 129 | // Compute sbox |
130 | s = xInv ^ xInv<<1 ^ xInv<<2 ^ xInv<<3 ^ xInv<<4; | 130 | s = xInv ^ xInv<<1 ^ xInv<<2 ^ xInv<<3 ^ xInv<<4; |
131 | s = s>>8 ^ s&255 ^ 99; | 131 | s = s>>8 ^ s&255 ^ 99; |
132 | sbox[x] = s; | 132 | sbox[x] = s; |
133 | sboxInv[s] = x; | 133 | sboxInv[s] = x; |
134 | 134 | ||
135 | // Compute MixColumns | 135 | // Compute MixColumns |
136 | x8 = d[x4 = d[x2 = d[x]]]; | 136 | x8 = d[x4 = d[x2 = d[x]]]; |
137 | tDec = x8*0x1010101 ^ x4*0x10001 ^ x2*0x101 ^ x*0x1010100; | 137 | tDec = x8*0x1010101 ^ x4*0x10001 ^ x2*0x101 ^ x*0x1010100; |
138 | tEnc = d[s]*0x101 ^ s*0x1010100; | 138 | tEnc = d[s]*0x101 ^ s*0x1010100; |
139 | 139 | ||
140 | for (i = 0; i < 4; i++) { | 140 | for (i = 0; i < 4; i++) { |
141 | encTable[i][x] = tEnc = tEnc<<24 ^ tEnc>>>8; | 141 | encTable[i][x] = tEnc = tEnc<<24 ^ tEnc>>>8; |
142 | decTable[i][s] = tDec = tDec<<24 ^ tDec>>>8; | 142 | decTable[i][s] = tDec = tDec<<24 ^ tDec>>>8; |
143 | } | 143 | } |
144 | } | 144 | } |
145 | 145 | ||
146 | // Compactify. Considerable speedup on Firefox. | 146 | // Compactify. Considerable speedup on Firefox. |
147 | for (i = 0; i < 5; i++) { | 147 | for (i = 0; i < 5; i++) { |
148 | encTable[i] = encTable[i].slice(0); | 148 | encTable[i] = encTable[i].slice(0); |
149 | decTable[i] = decTable[i].slice(0); | 149 | decTable[i] = decTable[i].slice(0); |
150 | } | 150 | } |
151 | }, | 151 | }, |
152 | 152 | ||
153 | /** | 153 | /** |
154 | * Decrypt an array of 4 big-endian words. | 154 | * Decrypt 16 bytes, specified as four 32-bit words. |
155 | * @param {Array} data The ciphertext. | 155 | * @param encrypted0 {number} the first word to decrypt |
156 | * @param encrypted1 {number} the second word to decrypt | ||
157 | * @param encrypted2 {number} the third word to decrypt | ||
158 | * @param encrypted3 {number} the fourth word to decrypt | ||
159 | * @param out {Int32Array} the array to write the decrypted words | ||
160 | * into | ||
161 | * @param offset {number} the offset into the output array to start | ||
162 | * writing results | ||
156 | * @return {Array} The plaintext. | 163 | * @return {Array} The plaintext. |
157 | */ | 164 | */ |
158 | decrypt:function (input) { | 165 | decrypt:function (encrypted0, encrypted1, encrypted2, encrypted3, out, offset) { |
159 | if (input.length !== 4) { | ||
160 | throw new Error("Invalid aes block size"); | ||
161 | } | ||
162 | |||
163 | var key = this._key[1], | 166 | var key = this._key[1], |
164 | // state variables a,b,c,d are loaded with pre-whitened data | 167 | // state variables a,b,c,d are loaded with pre-whitened data |
165 | a = input[0] ^ key[0], | 168 | a = encrypted0 ^ key[0], |
166 | b = input[3] ^ key[1], | 169 | b = encrypted3 ^ key[1], |
167 | c = input[2] ^ key[2], | 170 | c = encrypted2 ^ key[2], |
168 | d = input[1] ^ key[3], | 171 | d = encrypted1 ^ key[3], |
169 | a2, b2, c2, | 172 | a2, b2, c2, |
170 | 173 | ||
171 | nInnerRounds = key.length/4 - 2, | 174 | nInnerRounds = key.length / 4 - 2, // key.length === 2 ? |
172 | i, | 175 | i, |
173 | kIndex = 4, | 176 | kIndex = 4, |
174 | out = [0,0,0,0], | ||
175 | table = this._tables[1], | 177 | table = this._tables[1], |
176 | 178 | ||
177 | // load up the tables | 179 | // load up the tables |
178 | t0 = table[0], | 180 | table0 = table[0], |
179 | t1 = table[1], | 181 | table1 = table[1], |
180 | t2 = table[2], | 182 | table2 = table[2], |
181 | t3 = table[3], | 183 | table3 = table[3], |
182 | sbox = table[4]; | 184 | sbox = table[4]; |
183 | 185 | ||
184 | // Inner rounds. Cribbed from OpenSSL. | 186 | // Inner rounds. Cribbed from OpenSSL. |
185 | for (i = 0; i < nInnerRounds; i++) { | 187 | for (i = 0; i < nInnerRounds; i++) { |
186 | a2 = t0[a>>>24] ^ t1[b>>16 & 255] ^ t2[c>>8 & 255] ^ t3[d & 255] ^ key[kIndex]; | 188 | a2 = table0[a>>>24] ^ table1[b>>16 & 255] ^ table2[c>>8 & 255] ^ table3[d & 255] ^ key[kIndex]; |
187 | b2 = t0[b>>>24] ^ t1[c>>16 & 255] ^ t2[d>>8 & 255] ^ t3[a & 255] ^ key[kIndex + 1]; | 189 | b2 = table0[b>>>24] ^ table1[c>>16 & 255] ^ table2[d>>8 & 255] ^ table3[a & 255] ^ key[kIndex + 1]; |
188 | c2 = t0[c>>>24] ^ t1[d>>16 & 255] ^ t2[a>>8 & 255] ^ t3[b & 255] ^ key[kIndex + 2]; | 190 | c2 = table0[c>>>24] ^ table1[d>>16 & 255] ^ table2[a>>8 & 255] ^ table3[b & 255] ^ key[kIndex + 2]; |
189 | d = t0[d>>>24] ^ t1[a>>16 & 255] ^ t2[b>>8 & 255] ^ t3[c & 255] ^ key[kIndex + 3]; | 191 | d = table0[d>>>24] ^ table1[a>>16 & 255] ^ table2[b>>8 & 255] ^ table3[c & 255] ^ key[kIndex + 3]; |
190 | kIndex += 4; | 192 | kIndex += 4; |
191 | a=a2; b=b2; c=c2; | 193 | a=a2; b=b2; c=c2; |
192 | } | 194 | } |
193 | 195 | ||
194 | // Last round. | 196 | // Last round. |
195 | for (i = 0; i < 4; i++) { | 197 | for (i = 0; i < 4; i++) { |
196 | out[3 & -i] = | 198 | out[(3 & -i) + offset] = |
197 | sbox[a>>>24 ]<<24 ^ | 199 | sbox[a>>>24 ]<<24 ^ |
198 | sbox[b>>16 & 255]<<16 ^ | 200 | sbox[b>>16 & 255]<<16 ^ |
199 | sbox[c>>8 & 255]<<8 ^ | 201 | sbox[c>>8 & 255]<<8 ^ |
200 | sbox[d & 255] ^ | 202 | sbox[d & 255] ^ |
201 | key[kIndex++]; | 203 | key[kIndex++]; |
202 | a2=a; a=b; b=c; c=d; d=a2; | 204 | a2=a; a=b; b=c; c=d; d=a2; |
203 | } | 205 | } |
204 | |||
205 | return out; | ||
206 | } | 206 | } |
207 | }; | 207 | }; |
208 | 208 | ||
209 | /** | ||
210 | * Decrypt bytes using AES-128 with CBC and PKCS#7 padding. | ||
211 | * @param encrypted {Uint8Array} the encrypted bytes | ||
212 | * @param key {Uint32Array} the bytes of the decryption key | ||
213 | * @param initVector {Uint32Array} the initialization vector (IV) to | ||
214 | * use for the first round of CBC. | ||
215 | * @return {Uint8Array} the decrypted bytes | ||
216 | * | ||
217 | * @see http://en.wikipedia.org/wiki/Advanced_Encryption_Standard | ||
218 | * @see http://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Cipher_Block_Chaining_.28CBC.29 | ||
219 | * @see https://tools.ietf.org/html/rfc2315 | ||
220 | */ | ||
209 | decrypt = function(encrypted, key, initVector) { | 221 | decrypt = function(encrypted, key, initVector) { |
210 | var | 222 | var |
211 | encryptedView = new DataView(encrypted.buffer), | 223 | // word-level access to the encrypted bytes |
212 | platformEndian = new Uint32Array(encrypted.byteLength / 4), | 224 | encrypted32 = new Int32Array(encrypted.buffer), |
225 | |||
213 | decipher = new AES(Array.prototype.slice.call(key)), | 226 | decipher = new AES(Array.prototype.slice.call(key)), |
227 | |||
228 | // byte and word-level access for the decrypted output | ||
214 | decrypted = new Uint8Array(encrypted.byteLength), | 229 | decrypted = new Uint8Array(encrypted.byteLength), |
215 | decryptedView = new DataView(decrypted.buffer), | 230 | decrypted32 = new Int32Array(decrypted.buffer), |
216 | decryptedBlock, | 231 | |
217 | word, | 232 | // temporary variables for working with the IV, encrypted, and |
218 | byte; | 233 | // decrypted data |
219 | 234 | init0, init1, init2, init3, | |
220 | // convert big-endian input to platform byte order for decryption | 235 | encrypted0, encrypted1, encrypted2, encrypted3, |
221 | for (byte = 0; byte < encrypted.byteLength; byte += 4) { | 236 | decrypted0, decrypted1, decrypted2, decrypted3, |
222 | platformEndian[byte >>> 2] = encryptedView.getUint32(byte); | 237 | |
223 | } | 238 | // iteration variable |
239 | wordIx; | ||
240 | |||
241 | // pull out the words of the IV to ensure we don't modify the | ||
242 | // passed-in reference and easier access | ||
243 | init0 = initVector[0]; | ||
244 | init1 = initVector[1]; | ||
245 | init2 = initVector[2]; | ||
246 | init3 = initVector[3]; | ||
247 | |||
224 | // decrypt four word sequences, applying cipher-block chaining (CBC) | 248 | // decrypt four word sequences, applying cipher-block chaining (CBC) |
225 | // to each decrypted block | 249 | // to each decrypted block |
226 | for (word = 0; word < platformEndian.length; word += 4) { | 250 | for (wordIx = 0; wordIx < encrypted32.length; wordIx += 4) { |
251 | // convert big-endian (network order) words into little-endian | ||
252 | // (javascript order) | ||
253 | encrypted0 = (encrypted32[wordIx] << 24) | | ||
254 | ((encrypted32[wordIx] & 0xff00) << 8) | | ||
255 | ((encrypted32[wordIx] & 0xff0000) >> 8) | | ||
256 | (encrypted32[wordIx] >>> 24); | ||
257 | encrypted1 = (encrypted32[wordIx + 1] << 24) | | ||
258 | ((encrypted32[wordIx + 1] & 0xff00) << 8) | | ||
259 | ((encrypted32[wordIx + 1] & 0xff0000) >> 8) | | ||
260 | (encrypted32[wordIx + 1] >>> 24); | ||
261 | encrypted2 = (encrypted32[wordIx + 2] << 24) | | ||
262 | ((encrypted32[wordIx + 2] & 0xff00) << 8) | | ||
263 | ((encrypted32[wordIx + 2] & 0xff0000) >> 8) | | ||
264 | (encrypted32[wordIx + 2] >>> 24); | ||
265 | encrypted3 = (encrypted32[wordIx + 3] << 24) | | ||
266 | ((encrypted32[wordIx + 3] & 0xff00) << 8) | | ||
267 | ((encrypted32[wordIx + 3] & 0xff0000) >> 8) | | ||
268 | (encrypted32[wordIx + 3] >>> 24); | ||
269 | |||
227 | // decrypt the block | 270 | // decrypt the block |
228 | decryptedBlock = decipher.decrypt(platformEndian.subarray(word, word + 4)); | 271 | decipher.decrypt(encrypted0, encrypted1, encrypted2, encrypted3, decrypted32, wordIx); |
229 | 272 | ||
230 | // XOR with the IV, and restore network byte-order to obtain the | 273 | // XOR with the IV, and restore network byte-order to obtain the |
231 | // plaintext | 274 | // plaintext |
232 | byte = word << 2; | 275 | decrypted0 = decrypted32[wordIx] ^ init0; |
233 | decryptedView.setUint32(byte, decryptedBlock[0] ^ initVector[0]); | 276 | decrypted1 = decrypted32[wordIx + 1] ^ init1; |
234 | decryptedView.setUint32(byte + 4, decryptedBlock[1] ^ initVector[1]); | 277 | decrypted2 = decrypted32[wordIx + 2] ^ init2; |
235 | decryptedView.setUint32(byte + 8, decryptedBlock[2] ^ initVector[2]); | 278 | decrypted3 = decrypted32[wordIx + 3] ^ init3; |
236 | decryptedView.setUint32(byte + 12, decryptedBlock[3] ^ initVector[3]); | 279 | |
280 | decrypted32[wordIx] = decrypted0 << 24 | | ||
281 | ((decrypted0 & 0xff00) << 8) | | ||
282 | ((decrypted0 & 0xff0000) >> 8) | | ||
283 | (decrypted0 >>> 24); | ||
284 | decrypted32[wordIx + 1] = decrypted1 << 24 | | ||
285 | ((decrypted1 & 0xff00) << 8) | | ||
286 | ((decrypted1 & 0xff0000) >> 8) | | ||
287 | (decrypted1 >>> 24); | ||
288 | decrypted32[wordIx + 2] = decrypted2 << 24 | | ||
289 | ((decrypted2 & 0xff00) << 8) | | ||
290 | ((decrypted2 & 0xff0000) >> 8) | | ||
291 | (decrypted2 >>> 24); | ||
292 | decrypted32[wordIx + 3] = decrypted3 << 24 | | ||
293 | ((decrypted3 & 0xff00) << 8) | | ||
294 | ((decrypted3 & 0xff0000) >> 8) | | ||
295 | (decrypted3 >>> 24); | ||
237 | 296 | ||
238 | // setup the IV for the next round | 297 | // setup the IV for the next round |
239 | initVector = platformEndian.subarray(word, word + 4); | 298 | init0 = encrypted0; |
299 | init1 = encrypted1; | ||
300 | init2 = encrypted2; | ||
301 | init3 = encrypted3; | ||
240 | } | 302 | } |
241 | 303 | ||
242 | // remove any padding | 304 | // remove any padding | ... | ... |
-
Please register or sign in to post a comment