00001
00002
00003
00004 #include "pch.h"
00005 #include "config.h"
00006
00007 #include "vmac.h"
00008 #include "cpu.h"
00009 #include "argnames.h"
00010 #include "secblock.h"
00011
00012 #if CRYPTOPP_MSC_VERSION
00013 # pragma warning(disable: 4731)
00014 #endif
00015
00016 NAMESPACE_BEGIN(CryptoPP)
00017
00018 #if defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
00019 #include <intrin.h>
00020 #endif
00021
00022 #define VMAC_BOOL_WORD128 (defined(CRYPTOPP_WORD128_AVAILABLE) && !defined(CRYPTOPP_X64_ASM_AVAILABLE))
00023 #ifdef __BORLANDC__
00024 #define const // Turbo C++ 2006 workaround
00025 #endif
00026 static const word64 p64 = W64LIT(0xfffffffffffffeff);
00027 static const word64 m62 = W64LIT(0x3fffffffffffffff);
00028 static const word64 m63 = W64LIT(0x7fffffffffffffff);
00029 static const word64 m64 = W64LIT(0xffffffffffffffff);
00030 static const word64 mpoly = W64LIT(0x1fffffff1fffffff);
00031 #ifdef __BORLANDC__
00032 #undef const
00033 #endif
00034 #if VMAC_BOOL_WORD128
00035 #ifdef __powerpc__
00036
00037 #define m126 ((word128(m62)<<64)|m64)
00038 #else
00039 static const word128 m126 = (word128(m62)<<64)|m64;
00040 #endif
00041 #endif
00042
00043 void VMAC_Base::UncheckedSetKey(const byte *userKey, unsigned int keylength, const NameValuePairs ¶ms)
00044 {
00045 int digestLength = params.GetIntValueWithDefault(Name::DigestSize(), DefaultDigestSize());
00046 if (digestLength != 8 && digestLength != 16)
00047 throw InvalidArgument("VMAC: DigestSize must be 8 or 16");
00048 m_is128 = digestLength == 16;
00049
00050 m_L1KeyLength = params.GetIntValueWithDefault(Name::L1KeyLength(), 128);
00051 if (m_L1KeyLength <= 0 || m_L1KeyLength % 128 != 0)
00052 throw InvalidArgument("VMAC: L1KeyLength must be a positive multiple of 128");
00053
00054 AllocateBlocks();
00055
00056 BlockCipher &cipher = AccessCipher();
00057 cipher.SetKey(userKey, keylength, params);
00058 const unsigned int blockSize = cipher.BlockSize();
00059 const unsigned int blockSizeInWords = blockSize / sizeof(word64);
00060 SecBlock<word64> out(blockSizeInWords);
00061 SecByteBlock in;
00062 in.CleanNew(blockSize);
00063 size_t i;
00064
00065
00066 in[0] = 0x80;
00067 cipher.AdvancedProcessBlocks(in, NULL, (byte *)m_nhKey(), m_nhKeySize()*sizeof(word64), cipher.BT_InBlockIsCounter);
00068 ConditionalByteReverse<word64>(BIG_ENDIAN_ORDER, m_nhKey(), m_nhKey(), m_nhKeySize()*sizeof(word64));
00069
00070
00071 in[0] = 0xC0;
00072 in[15] = 0;
00073 for (i = 0; i <= (size_t)m_is128; i++)
00074 {
00075 cipher.ProcessBlock(in, out.BytePtr());
00076 m_polyState()[i*4+2] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()) & mpoly;
00077 m_polyState()[i*4+3] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8) & mpoly;
00078 in[15]++;
00079 }
00080
00081
00082 in[0] = 0xE0;
00083 in[15] = 0;
00084 word64 *l3Key = m_l3Key();
00085 for (i = 0; i <= (size_t)m_is128; i++)
00086 do
00087 {
00088 cipher.ProcessBlock(in, out.BytePtr());
00089 l3Key[i*2+0] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr());
00090 l3Key[i*2+1] = GetWord<word64>(true, BIG_ENDIAN_ORDER, out.BytePtr()+8);
00091 in[15]++;
00092 } while ((l3Key[i*2+0] >= p64) || (l3Key[i*2+1] >= p64));
00093
00094 m_padCached = false;
00095 size_t nonceLength;
00096 const byte *nonce = GetIVAndThrowIfInvalid(params, nonceLength);
00097 Resynchronize(nonce, (int)nonceLength);
00098 }
00099
00100 void VMAC_Base::GetNextIV(RandomNumberGenerator &rng, byte *IV)
00101 {
00102 SimpleKeyingInterface::GetNextIV(rng, IV);
00103 IV[0] &= 0x7f;
00104 }
00105
00106 void VMAC_Base::Resynchronize(const byte *nonce, int len)
00107 {
00108 size_t length = ThrowIfInvalidIVLength(len);
00109 size_t s = IVSize();
00110 byte *storedNonce = m_nonce();
00111
00112 if (m_is128)
00113 {
00114 memset(storedNonce, 0, s-length);
00115 memcpy(storedNonce+s-length, nonce, length);
00116 AccessCipher().ProcessBlock(storedNonce, m_pad());
00117 }
00118 else
00119 {
00120 if (m_padCached && (storedNonce[s-1] | 1) == (nonce[length-1] | 1))
00121 {
00122 m_padCached = VerifyBufsEqual(storedNonce+s-length, nonce, length-1);
00123 for (size_t i=0; m_padCached && i<s-length; i++)
00124 m_padCached = (storedNonce[i] == 0);
00125 }
00126 if (!m_padCached)
00127 {
00128 memset(storedNonce, 0, s-length);
00129 memcpy(storedNonce+s-length, nonce, length-1);
00130 storedNonce[s-1] = nonce[length-1] & 0xfe;
00131 AccessCipher().ProcessBlock(storedNonce, m_pad());
00132 m_padCached = true;
00133 }
00134 storedNonce[s-1] = nonce[length-1];
00135 }
00136 m_isFirstBlock = true;
00137 Restart();
00138 }
00139
00140 void VMAC_Base::HashEndianCorrectedBlock(const word64 *data)
00141 {
00142 CRYPTOPP_UNUSED(data);
00143 assert(false);
00144 throw NotImplemented("VMAC: HashEndianCorrectedBlock is not implemented");
00145 }
00146
00147 unsigned int VMAC_Base::OptimalDataAlignment() const
00148 {
00149 return
00150 #if (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_VMAC_ASM)
00151 HasSSE2() ? 16 :
00152 #endif
00153 GetCipher().OptimalDataAlignment();
00154 }
00155
00156 #if (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || (CRYPTOPP_BOOL_X32 && !defined(CRYPTOPP_DISABLE_VMAC_ASM))))
00157 #if CRYPTOPP_MSC_VERSION
00158 # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
00159 #endif
00160 void
00161 #ifdef __GNUC__
00162 __attribute__ ((noinline))
00163 #endif
00164 VMAC_Base::VHASH_Update_SSE2(const word64 *data, size_t blocksRemainingInWord64, int tagPart)
00165 {
00166 const word64 *nhK = m_nhKey();
00167 word64 *polyS = m_polyState();
00168 word32 L1KeyLength = m_L1KeyLength;
00169
00170 CRYPTOPP_UNUSED(data); CRYPTOPP_UNUSED(tagPart); CRYPTOPP_UNUSED(L1KeyLength);
00171 CRYPTOPP_UNUSED(blocksRemainingInWord64);
00172
00173 #ifdef __GNUC__
00174 word32 temp;
00175 __asm__ __volatile__
00176 (
00177 AS2( mov %%ebx, %0)
00178 AS2( mov %1, %%ebx)
00179 INTEL_NOPREFIX
00180 #else
00181 #if _MSC_VER < 1300 || defined(__INTEL_COMPILER)
00182 char isFirstBlock = m_isFirstBlock;
00183 AS2( mov ebx, [L1KeyLength])
00184 AS2( mov dl, [isFirstBlock])
00185 #else
00186 AS2( mov ecx, this)
00187 AS2( mov ebx, [ecx+m_L1KeyLength])
00188 AS2( mov dl, [ecx+m_isFirstBlock])
00189 #endif
00190 AS2( mov eax, tagPart)
00191 AS2( shl eax, 4)
00192 AS2( mov edi, nhK)
00193 AS2( add edi, eax)
00194 AS2( add eax, eax)
00195 AS2( add eax, polyS)
00196
00197 AS2( mov esi, data)
00198 AS2( mov ecx, blocksRemainingInWord64)
00199 #endif
00200
00201 AS2( shr ebx, 3)
00202 #if CRYPTOPP_BOOL_X32
00203 AS_PUSH_IF86( bp)
00204 AS2( sub esp, 24)
00205 #else
00206 AS_PUSH_IF86( bp)
00207 AS2( sub esp, 12)
00208 #endif
00209 ASL(4)
00210 AS2( mov ebp, ebx)
00211 AS2( cmp ecx, ebx)
00212 AS2( cmovl ebp, ecx)
00213 AS2( sub ecx, ebp)
00214 AS2( lea ebp, [edi+8*ebp])
00215 AS2( movq mm6, [esi])
00216 AS2( paddq mm6, [edi])
00217 AS2( movq mm5, [esi+8])
00218 AS2( paddq mm5, [edi+8])
00219 AS2( add esi, 16)
00220 AS2( add edi, 16)
00221 AS2( movq mm4, mm6)
00222 ASS( pshufw mm2, mm6, 1, 0, 3, 2)
00223 AS2( pmuludq mm6, mm5)
00224 ASS( pshufw mm3, mm5, 1, 0, 3, 2)
00225 AS2( pmuludq mm5, mm2)
00226 AS2( pmuludq mm2, mm3)
00227 AS2( pmuludq mm3, mm4)
00228 AS2( pxor mm7, mm7)
00229 AS2( movd [esp], mm6)
00230 AS2( psrlq mm6, 32)
00231 #if CRYPTOPP_BOOL_X32
00232 AS2( movd [esp+8], mm5)
00233 #else
00234 AS2( movd [esp+4], mm5)
00235 #endif
00236 AS2( psrlq mm5, 32)
00237 AS2( cmp edi, ebp)
00238 ASJ( je, 1, f)
00239 ASL(0)
00240 AS2( movq mm0, [esi])
00241 AS2( paddq mm0, [edi])
00242 AS2( movq mm1, [esi+8])
00243 AS2( paddq mm1, [edi+8])
00244 AS2( add esi, 16)
00245 AS2( add edi, 16)
00246 AS2( movq mm4, mm0)
00247 AS2( paddq mm5, mm2)
00248 ASS( pshufw mm2, mm0, 1, 0, 3, 2)
00249 AS2( pmuludq mm0, mm1)
00250 #if CRYPTOPP_BOOL_X32
00251 AS2( movd [esp+16], mm3)
00252 #else
00253 AS2( movd [esp+8], mm3)
00254 #endif
00255 AS2( psrlq mm3, 32)
00256 AS2( paddq mm5, mm3)
00257 ASS( pshufw mm3, mm1, 1, 0, 3, 2)
00258 AS2( pmuludq mm1, mm2)
00259 AS2( pmuludq mm2, mm3)
00260 AS2( pmuludq mm3, mm4)
00261 AS2( movd mm4, [esp])
00262 AS2( paddq mm7, mm4)
00263 #if CRYPTOPP_BOOL_X32
00264 AS2( movd mm4, [esp+8])
00265 AS2( paddq mm6, mm4)
00266 AS2( movd mm4, [esp+16])
00267 #else
00268 AS2( movd mm4, [esp+4])
00269 AS2( paddq mm6, mm4)
00270 AS2( movd mm4, [esp+8])
00271 #endif
00272 AS2( paddq mm6, mm4)
00273 AS2( movd [esp], mm0)
00274 AS2( psrlq mm0, 32)
00275 AS2( paddq mm6, mm0)
00276 #if CRYPTOPP_BOOL_X32
00277 AS2( movd [esp+8], mm1)
00278 #else
00279 AS2( movd [esp+4], mm1)
00280 #endif
00281 AS2( psrlq mm1, 32)
00282 AS2( paddq mm5, mm1)
00283 AS2( cmp edi, ebp)
00284 ASJ( jne, 0, b)
00285 ASL(1)
00286 AS2( paddq mm5, mm2)
00287 #if CRYPTOPP_BOOL_X32
00288 AS2( movd [esp+16], mm3)
00289 #else
00290 AS2( movd [esp+8], mm3)
00291 #endif
00292 AS2( psrlq mm3, 32)
00293 AS2( paddq mm5, mm3)
00294 AS2( movd mm4, [esp])
00295 AS2( paddq mm7, mm4)
00296 #if CRYPTOPP_BOOL_X32
00297 AS2( movd mm4, [esp+8])
00298 AS2( paddq mm6, mm4)
00299 AS2( movd mm4, [esp+16])
00300 #else
00301 AS2( movd mm4, [esp+4])
00302 AS2( paddq mm6, mm4)
00303 AS2( movd mm4, [esp+8])
00304 #endif
00305 AS2( paddq mm6, mm4)
00306 AS2( lea ebp, [8*ebx])
00307 AS2( sub edi, ebp)
00308
00309 AS2( movd [esp], mm7)
00310 AS2( psrlq mm7, 32)
00311 AS2( paddq mm6, mm7)
00312 #if CRYPTOPP_BOOL_X32
00313 AS2( movd [esp+8], mm6)
00314 #else
00315 AS2( movd [esp+4], mm6)
00316 #endif
00317 AS2( psrlq mm6, 32)
00318 AS2( paddq mm5, mm6)
00319 AS2( psllq mm5, 2)
00320 AS2( psrlq mm5, 2)
00321
00322 #define a0 [eax+2*4]
00323 #define a1 [eax+3*4]
00324 #define a2 [eax+0*4]
00325 #define a3 [eax+1*4]
00326 #define k0 [eax+2*8+2*4]
00327 #define k1 [eax+2*8+3*4]
00328 #define k2 [eax+2*8+0*4]
00329 #define k3 [eax+2*8+1*4]
00330 AS2( test dl, dl)
00331 ASJ( jz, 2, f)
00332 AS2( movd mm1, k0)
00333 AS2( movd mm0, [esp])
00334 AS2( paddq mm0, mm1)
00335 AS2( movd a0, mm0)
00336 AS2( psrlq mm0, 32)
00337 AS2( movd mm1, k1)
00338 #if CRYPTOPP_BOOL_X32
00339 AS2( movd mm2, [esp+8])
00340 #else
00341 AS2( movd mm2, [esp+4])
00342 #endif
00343 AS2( paddq mm1, mm2)
00344 AS2( paddq mm0, mm1)
00345 AS2( movd a1, mm0)
00346 AS2( psrlq mm0, 32)
00347 AS2( paddq mm5, k2)
00348 AS2( paddq mm0, mm5)
00349 AS2( movq a2, mm0)
00350 AS2( xor edx, edx)
00351 ASJ( jmp, 3, f)
00352 ASL(2)
00353 AS2( movd mm0, a3)
00354 AS2( movq mm4, mm0)
00355 AS2( pmuludq mm0, k3)
00356 AS2( movd mm1, a0)
00357 AS2( pmuludq mm1, k2)
00358 AS2( movd mm2, a1)
00359 AS2( movd mm6, k1)
00360 AS2( pmuludq mm2, mm6)
00361 AS2( movd mm3, a2)
00362 AS2( psllq mm0, 1)
00363 AS2( paddq mm0, mm5)
00364 AS2( movq mm5, mm3)
00365 AS2( movd mm7, k0)
00366 AS2( pmuludq mm3, mm7)
00367 AS2( pmuludq mm4, mm7)
00368 AS2( pmuludq mm5, mm6)
00369 AS2( paddq mm0, mm1)
00370 AS2( movd mm1, a1)
00371 AS2( paddq mm4, mm5)
00372 AS2( movq mm5, mm1)
00373 AS2( pmuludq mm1, k2)
00374 AS2( paddq mm0, mm2)
00375 AS2( movd mm2, a0)
00376 AS2( paddq mm0, mm3)
00377 AS2( movq mm3, mm2)
00378 AS2( pmuludq mm2, k3)
00379 AS2( pmuludq mm3, mm7)
00380 #if CRYPTOPP_BOOL_X32
00381 AS2( movd [esp+16], mm0)
00382 #else
00383 AS2( movd [esp+8], mm0)
00384 #endif
00385 AS2( psrlq mm0, 32)
00386 AS2( pmuludq mm7, mm5)
00387 AS2( pmuludq mm5, k3)
00388 AS2( paddq mm0, mm1)
00389 AS2( movd mm1, a2)
00390 AS2( pmuludq mm1, k2)
00391 AS2( paddq mm0, mm2)
00392 AS2( paddq mm0, mm4)
00393 AS2( movq mm4, mm0)
00394 AS2( movd mm2, a3)
00395 AS2( pmuludq mm2, mm6)
00396 AS2( pmuludq mm6, a0)
00397 AS2( psrlq mm0, 31)
00398 AS2( paddq mm0, mm3)
00399 AS2( movd mm3, [esp])
00400 AS2( paddq mm0, mm3)
00401 AS2( movd mm3, a2)
00402 AS2( pmuludq mm3, k3)
00403 AS2( paddq mm5, mm1)
00404 AS2( movd mm1, a3)
00405 AS2( pmuludq mm1, k2)
00406 AS2( paddq mm5, mm2)
00407 #if CRYPTOPP_BOOL_X32
00408 AS2( movd mm2, [esp+8])
00409 #else
00410 AS2( movd mm2, [esp+4])
00411 #endif
00412 AS2( psllq mm5, 1)
00413 AS2( paddq mm0, mm5)
00414 AS2( psllq mm4, 33)
00415 AS2( movd a0, mm0)
00416 AS2( psrlq mm0, 32)
00417 AS2( paddq mm6, mm7)
00418 #if CRYPTOPP_BOOL_X32
00419 AS2( movd mm7, [esp+16])
00420 #else
00421 AS2( movd mm7, [esp+8])
00422 #endif
00423 AS2( paddq mm0, mm6)
00424 AS2( paddq mm0, mm2)
00425 AS2( paddq mm3, mm1)
00426 AS2( psllq mm3, 1)
00427 AS2( paddq mm0, mm3)
00428 AS2( psrlq mm4, 1)
00429 AS2( movd a1, mm0)
00430 AS2( psrlq mm0, 32)
00431 AS2( por mm4, mm7)
00432 AS2( paddq mm0, mm4)
00433 AS2( movq a2, mm0)
00434 #undef a0
00435 #undef a1
00436 #undef a2
00437 #undef a3
00438 #undef k0
00439 #undef k1
00440 #undef k2
00441 #undef k3
00442
00443 ASL(3)
00444 AS2( test ecx, ecx)
00445 ASJ( jnz, 4, b)
00446 #if CRYPTOPP_BOOL_X32
00447 AS2( add esp, 24)
00448 #else
00449 AS2( add esp, 12)
00450 #endif
00451 AS_POP_IF86( bp)
00452 AS1( emms)
00453 #ifdef __GNUC__
00454 ATT_PREFIX
00455 AS2( mov %0, %%ebx)
00456 : "=m" (temp)
00457 : "m" (L1KeyLength), "c" (blocksRemainingInWord64), "S" (data), "D" (nhK+tagPart*2), "d" (m_isFirstBlock), "a" (polyS+tagPart*4)
00458 : "memory", "cc"
00459 );
00460 #endif
00461 }
00462 #endif
00463
00464 #if VMAC_BOOL_WORD128
00465 #define DeclareNH(a) word128 a=0
00466 #define MUL64(rh,rl,i1,i2) {word128 p = word128(i1)*(i2); rh = word64(p>>64); rl = word64(p);}
00467 #define AccumulateNH(a, b, c) a += word128(b)*(c)
00468 #define Multiply128(r, i1, i2) r = word128(word64(i1)) * word64(i2)
00469 #else
00470 #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER)
00471 #define MUL32(a, b) __emulu(word32(a), word32(b))
00472 #else
00473 #define MUL32(a, b) ((word64)((word32)(a)) * (word32)(b))
00474 #endif
00475 #if defined(CRYPTOPP_X64_ASM_AVAILABLE)
00476 #define DeclareNH(a) word64 a##0=0, a##1=0
00477 #define MUL64(rh,rl,i1,i2) asm ("mulq %3" : "=a"(rl), "=d"(rh) : "a"(i1), "g"(i2) : "cc");
00478 #define AccumulateNH(a, b, c) asm ("mulq %3; addq %%rax, %0; adcq %%rdx, %1" : "+r"(a##0), "+r"(a##1) : "a"(b), "g"(c) : "%rdx", "cc");
00479 #define ADD128(rh,rl,ih,il) asm ("addq %3, %1; adcq %2, %0" : "+r"(rh),"+r"(rl) : "r"(ih),"r"(il) : "cc");
00480 #elif defined(_MSC_VER) && !CRYPTOPP_BOOL_SLOW_WORD64
00481 #define DeclareNH(a) word64 a##0=0, a##1=0
00482 #define MUL64(rh,rl,i1,i2) (rl) = _umul128(i1,i2,&(rh));
00483 #define AccumulateNH(a, b, c) {\
00484 word64 ph, pl;\
00485 pl = _umul128(b,c,&ph);\
00486 a##0 += pl;\
00487 a##1 += ph + (a##0 < pl);}
00488 #else
00489 #define VMAC_BOOL_32BIT 1
00490 #define DeclareNH(a) word64 a##0=0, a##1=0, a##2=0
00491 #define MUL64(rh,rl,i1,i2) \
00492 { word64 _i1 = (i1), _i2 = (i2); \
00493 word64 m1= MUL32(_i1,_i2>>32); \
00494 word64 m2= MUL32(_i1>>32,_i2); \
00495 rh = MUL32(_i1>>32,_i2>>32); \
00496 rl = MUL32(_i1,_i2); \
00497 ADD128(rh,rl,(m1 >> 32),(m1 << 32)); \
00498 ADD128(rh,rl,(m2 >> 32),(m2 << 32)); \
00499 }
00500 #define AccumulateNH(a, b, c) {\
00501 word64 p = MUL32(b, c);\
00502 a##1 += word32((p)>>32);\
00503 a##0 += word32(p);\
00504 p = MUL32((b)>>32, c);\
00505 a##2 += word32((p)>>32);\
00506 a##1 += word32(p);\
00507 p = MUL32((b)>>32, (c)>>32);\
00508 a##2 += p;\
00509 p = MUL32(b, (c)>>32);\
00510 a##1 += word32(p);\
00511 a##2 += word32(p>>32);}
00512 #endif
00513 #endif
00514 #ifndef VMAC_BOOL_32BIT
00515 #define VMAC_BOOL_32BIT 0
00516 #endif
00517 #ifndef ADD128
00518 #define ADD128(rh,rl,ih,il) \
00519 { word64 _il = (il); \
00520 (rl) += (_il); \
00521 (rh) += (ih) + ((rl) < (_il)); \
00522 }
00523 #endif
00524
00525 #if !(defined(_MSC_VER) && _MSC_VER < 1300)
00526 template <bool T_128BitTag>
00527 #endif
00528 void VMAC_Base::VHASH_Update_Template(const word64 *data, size_t blocksRemainingInWord64)
00529 {
00530 #define INNER_LOOP_ITERATION(j) {\
00531 word64 d0 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+0]);\
00532 word64 d1 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+1]);\
00533 AccumulateNH(nhA, d0+nhK[i+2*j+0], d1+nhK[i+2*j+1]);\
00534 if (T_128BitTag)\
00535 AccumulateNH(nhB, d0+nhK[i+2*j+2], d1+nhK[i+2*j+3]);\
00536 }
00537
00538 #if (defined(_MSC_VER) && _MSC_VER < 1300)
00539 bool T_128BitTag = m_is128;
00540 #endif
00541 size_t L1KeyLengthInWord64 = m_L1KeyLength / 8;
00542 size_t innerLoopEnd = L1KeyLengthInWord64;
00543 const word64 *nhK = m_nhKey();
00544 word64 *polyS = m_polyState();
00545 bool isFirstBlock = true;
00546 size_t i;
00547
00548 #if !VMAC_BOOL_32BIT
00549 #if VMAC_BOOL_WORD128
00550 word128 a1=0, a2=0;
00551 #else
00552 word64 ah1=0, al1=0, ah2=0, al2=0;
00553 #endif
00554 word64 kh1, kl1, kh2, kl2;
00555 kh1=(polyS+0*4+2)[0]; kl1=(polyS+0*4+2)[1];
00556 if (T_128BitTag)
00557 {
00558 kh2=(polyS+1*4+2)[0]; kl2=(polyS+1*4+2)[1];
00559 }
00560 #endif
00561
00562 do
00563 {
00564 DeclareNH(nhA);
00565 DeclareNH(nhB);
00566
00567 i = 0;
00568 if (blocksRemainingInWord64 < L1KeyLengthInWord64)
00569 {
00570 if (blocksRemainingInWord64 % 8)
00571 {
00572 innerLoopEnd = blocksRemainingInWord64 % 8;
00573 for (; i<innerLoopEnd; i+=2)
00574 INNER_LOOP_ITERATION(0);
00575 }
00576 innerLoopEnd = blocksRemainingInWord64;
00577 }
00578 for (; i<innerLoopEnd; i+=8)
00579 {
00580 INNER_LOOP_ITERATION(0);
00581 INNER_LOOP_ITERATION(1);
00582 INNER_LOOP_ITERATION(2);
00583 INNER_LOOP_ITERATION(3);
00584 }
00585 blocksRemainingInWord64 -= innerLoopEnd;
00586 data += innerLoopEnd;
00587
00588 #if VMAC_BOOL_32BIT
00589 word32 nh0[2], nh1[2];
00590 word64 nh2[2];
00591
00592 nh0[0] = word32(nhA0);
00593 nhA1 += (nhA0 >> 32);
00594 nh1[0] = word32(nhA1);
00595 nh2[0] = (nhA2 + (nhA1 >> 32)) & m62;
00596
00597 if (T_128BitTag)
00598 {
00599 nh0[1] = word32(nhB0);
00600 nhB1 += (nhB0 >> 32);
00601 nh1[1] = word32(nhB1);
00602 nh2[1] = (nhB2 + (nhB1 >> 32)) & m62;
00603 }
00604
00605 #define a0 (((word32 *)(polyS+i*4))[2+NativeByteOrder::ToEnum()])
00606 #define a1 (*(((word32 *)(polyS+i*4))+3-NativeByteOrder::ToEnum())) // workaround for GCC 3.2
00607 #define a2 (((word32 *)(polyS+i*4))[0+NativeByteOrder::ToEnum()])
00608 #define a3 (*(((word32 *)(polyS+i*4))+1-NativeByteOrder::ToEnum()))
00609 #define aHi ((polyS+i*4)[0])
00610 #define k0 (((word32 *)(polyS+i*4+2))[2+NativeByteOrder::ToEnum()])
00611 #define k1 (*(((word32 *)(polyS+i*4+2))+3-NativeByteOrder::ToEnum()))
00612 #define k2 (((word32 *)(polyS+i*4+2))[0+NativeByteOrder::ToEnum()])
00613 #define k3 (*(((word32 *)(polyS+i*4+2))+1-NativeByteOrder::ToEnum()))
00614 #define kHi ((polyS+i*4+2)[0])
00615
00616 if (isFirstBlock)
00617 {
00618 isFirstBlock = false;
00619 if (m_isFirstBlock)
00620 {
00621 m_isFirstBlock = false;
00622 for (i=0; i<=(size_t)T_128BitTag; i++)
00623 {
00624 word64 t = (word64)nh0[i] + k0;
00625 a0 = (word32)t;
00626 t = (t >> 32) + nh1[i] + k1;
00627 a1 = (word32)t;
00628 aHi = (t >> 32) + nh2[i] + kHi;
00629 }
00630 continue;
00631 }
00632 }
00633 for (i=0; i<=(size_t)T_128BitTag; i++)
00634 {
00635 word64 p, t;
00636 word32 t2;
00637
00638 p = MUL32(a3, 2*k3);
00639 p += nh2[i];
00640 p += MUL32(a0, k2);
00641 p += MUL32(a1, k1);
00642 p += MUL32(a2, k0);
00643 t2 = (word32)p;
00644 p >>= 32;
00645 p += MUL32(a0, k3);
00646 p += MUL32(a1, k2);
00647 p += MUL32(a2, k1);
00648 p += MUL32(a3, k0);
00649 t = (word64(word32(p) & 0x7fffffff) << 32) | t2;
00650 p >>= 31;
00651 p += nh0[i];
00652 p += MUL32(a0, k0);
00653 p += MUL32(a1, 2*k3);
00654 p += MUL32(a2, 2*k2);
00655 p += MUL32(a3, 2*k1);
00656 t2 = (word32)p;
00657 p >>= 32;
00658 p += nh1[i];
00659 p += MUL32(a0, k1);
00660 p += MUL32(a1, k0);
00661 p += MUL32(a2, 2*k3);
00662 p += MUL32(a3, 2*k2);
00663 a0 = t2;
00664 a1 = (word32)p;
00665 aHi = (p >> 32) + t;
00666 }
00667
00668 #undef a0
00669 #undef a1
00670 #undef a2
00671 #undef a3
00672 #undef aHi
00673 #undef k0
00674 #undef k1
00675 #undef k2
00676 #undef k3
00677 #undef kHi
00678 #else // #if VMAC_BOOL_32BIT
00679 if (isFirstBlock)
00680 {
00681 isFirstBlock = false;
00682 if (m_isFirstBlock)
00683 {
00684 m_isFirstBlock = false;
00685 #if VMAC_BOOL_WORD128
00686 #define first_poly_step(a, kh, kl, m) a = (m & m126) + ((word128(kh) << 64) | kl)
00687
00688 first_poly_step(a1, kh1, kl1, nhA);
00689 if (T_128BitTag)
00690 first_poly_step(a2, kh2, kl2, nhB);
00691 #else
00692 #define first_poly_step(ah, al, kh, kl, mh, ml) {\
00693 mh &= m62;\
00694 ADD128(mh, ml, kh, kl); \
00695 ah = mh; al = ml;}
00696
00697 first_poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
00698 if (T_128BitTag)
00699 first_poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
00700 #endif
00701 continue;
00702 }
00703 else
00704 {
00705 #if VMAC_BOOL_WORD128
00706 a1 = (word128((polyS+0*4)[0]) << 64) | (polyS+0*4)[1];
00707 #else
00708 ah1=(polyS+0*4)[0]; al1=(polyS+0*4)[1];
00709 #endif
00710 if (T_128BitTag)
00711 {
00712 #if VMAC_BOOL_WORD128
00713 a2 = (word128((polyS+1*4)[0]) << 64) | (polyS+1*4)[1];
00714 #else
00715 ah2=(polyS+1*4)[0]; al2=(polyS+1*4)[1];
00716 #endif
00717 }
00718 }
00719 }
00720
00721 #if VMAC_BOOL_WORD128
00722 #define poly_step(a, kh, kl, m) \
00723 { word128 t1, t2, t3, t4;\
00724 Multiply128(t2, a>>64, kl);\
00725 Multiply128(t3, a, kh);\
00726 Multiply128(t1, a, kl);\
00727 Multiply128(t4, a>>64, 2*kh);\
00728 t2 += t3;\
00729 t4 += t1;\
00730 t2 += t4>>64;\
00731 a = (word128(word64(t2)&m63) << 64) | word64(t4);\
00732 t2 *= 2;\
00733 a += m & m126;\
00734 a += t2>>64;}
00735
00736 poly_step(a1, kh1, kl1, nhA);
00737 if (T_128BitTag)
00738 poly_step(a2, kh2, kl2, nhB);
00739 #else
00740 #define poly_step(ah, al, kh, kl, mh, ml) \
00741 { word64 t1h, t1l, t2h, t2l, t3h, t3l, z=0; \
00742 \
00743 MUL64(t2h,t2l,ah,kl); \
00744 MUL64(t3h,t3l,al,kh); \
00745 MUL64(t1h,t1l,ah,2*kh); \
00746 MUL64(ah,al,al,kl); \
00747 \
00748 ADD128(t2h,t2l,t3h,t3l); \
00749 \
00750 ADD128(ah,al,t1h,t1l); \
00751 \
00752 \
00753 ADD128(t2h,ah,z,t2l); \
00754 \
00755 t2h += t2h + (ah >> 63); \
00756 ah &= m63; \
00757 \
00758 mh &= m62; \
00759 ADD128(ah,al,mh,ml); \
00760 ADD128(ah,al,z,t2h); \
00761 }
00762
00763 poly_step(ah1, al1, kh1, kl1, nhA1, nhA0);
00764 if (T_128BitTag)
00765 poly_step(ah2, al2, kh2, kl2, nhB1, nhB0);
00766 #endif
00767 #endif // #if VMAC_BOOL_32BIT
00768 } while (blocksRemainingInWord64);
00769
00770 #if VMAC_BOOL_WORD128
00771 (polyS+0*4)[0]=word64(a1>>64); (polyS+0*4)[1]=word64(a1);
00772 if (T_128BitTag)
00773 {
00774 (polyS+1*4)[0]=word64(a2>>64); (polyS+1*4)[1]=word64(a2);
00775 }
00776 #elif !VMAC_BOOL_32BIT
00777 (polyS+0*4)[0]=ah1; (polyS+0*4)[1]=al1;
00778 if (T_128BitTag)
00779 {
00780 (polyS+1*4)[0]=ah2; (polyS+1*4)[1]=al2;
00781 }
00782 #endif
00783 }
00784
00785 inline void VMAC_Base::VHASH_Update(const word64 *data, size_t blocksRemainingInWord64)
00786 {
00787 #if (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && (CRYPTOPP_BOOL_X86 || (CRYPTOPP_BOOL_X32 && !defined(CRYPTOPP_DISABLE_VMAC_ASM))))
00788 if (HasSSE2())
00789 {
00790 VHASH_Update_SSE2(data, blocksRemainingInWord64, 0);
00791 if (m_is128)
00792 VHASH_Update_SSE2(data, blocksRemainingInWord64, 1);
00793 m_isFirstBlock = false;
00794 }
00795 else
00796 #endif
00797 {
00798 #if defined(_MSC_VER) && _MSC_VER < 1300
00799 VHASH_Update_Template(data, blocksRemainingInWord64);
00800 #else
00801 if (m_is128)
00802 VHASH_Update_Template<true>(data, blocksRemainingInWord64);
00803 else
00804 VHASH_Update_Template<false>(data, blocksRemainingInWord64);
00805 #endif
00806 }
00807 }
00808
00809 size_t VMAC_Base::HashMultipleBlocks(const word64 *data, size_t length)
00810 {
00811 size_t remaining = ModPowerOf2(length, m_L1KeyLength);
00812 VHASH_Update(data, (length-remaining)/8);
00813 return remaining;
00814 }
00815
00816 static word64 L3Hash(const word64 *input, const word64 *l3Key, size_t len)
00817 {
00818 word64 rh, rl, t, z=0;
00819 word64 p1 = input[0], p2 = input[1];
00820 word64 k1 = l3Key[0], k2 = l3Key[1];
00821
00822
00823 t = p1 >> 63;
00824 p1 &= m63;
00825 ADD128(p1, p2, len, t);
00826
00827 t = (p1 > m63) + ((p1 == m63) & (p2 == m64));
00828 ADD128(p1, p2, z, t);
00829 p1 &= m63;
00830
00831
00832 t = p1 + (p2 >> 32);
00833 t += (t >> 32);
00834 t += (word32)t > 0xfffffffeU;
00835 p1 += (t >> 32);
00836 p2 += (p1 << 32);
00837
00838
00839 p1 += k1;
00840 p1 += (0 - (p1 < k1)) & 257;
00841 p2 += k2;
00842 p2 += (0 - (p2 < k2)) & 257;
00843
00844
00845 MUL64(rh, rl, p1, p2);
00846 t = rh >> 56;
00847 ADD128(t, rl, z, rh);
00848 rh <<= 8;
00849 ADD128(t, rl, z, rh);
00850 t += t << 8;
00851 rl += t;
00852 rl += (0 - (rl < t)) & 257;
00853 rl += (0 - (rl > p64-1)) & 257;
00854 return rl;
00855 }
00856
00857 void VMAC_Base::TruncatedFinal(byte *mac, size_t size)
00858 {
00859 size_t len = ModPowerOf2(GetBitCountLo()/8, m_L1KeyLength);
00860
00861 if (len)
00862 {
00863 memset(m_data()+len, 0, (0-len)%16);
00864 VHASH_Update(DataBuf(), ((len+15)/16)*2);
00865 len *= 8;
00866 }
00867 else if (m_isFirstBlock)
00868 {
00869
00870 m_polyState()[0] = m_polyState()[2];
00871 m_polyState()[1] = m_polyState()[3];
00872 if (m_is128)
00873 {
00874 m_polyState()[4] = m_polyState()[6];
00875 m_polyState()[5] = m_polyState()[7];
00876 }
00877 }
00878
00879 if (m_is128)
00880 {
00881 word64 t[2];
00882 t[0] = L3Hash(m_polyState(), m_l3Key(), len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad());
00883 t[1] = L3Hash(m_polyState()+4, m_l3Key()+2, len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad()+8);
00884 if (size == 16)
00885 {
00886 PutWord(false, BIG_ENDIAN_ORDER, mac, t[0]);
00887 PutWord(false, BIG_ENDIAN_ORDER, mac+8, t[1]);
00888 }
00889 else
00890 {
00891 t[0] = ConditionalByteReverse(BIG_ENDIAN_ORDER, t[0]);
00892 t[1] = ConditionalByteReverse(BIG_ENDIAN_ORDER, t[1]);
00893 memcpy(mac, t, size);
00894 }
00895 }
00896 else
00897 {
00898 word64 t = L3Hash(m_polyState(), m_l3Key(), len);
00899 t += GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad() + (m_nonce()[IVSize()-1]&1) * 8);
00900 if (size == 8)
00901 PutWord(false, BIG_ENDIAN_ORDER, mac, t);
00902 else
00903 {
00904 t = ConditionalByteReverse(BIG_ENDIAN_ORDER, t);
00905 memcpy(mac, &t, size);
00906 }
00907 }
00908 }
00909
00910 NAMESPACE_END