00001
00002
00003
00004 #include "pch.h"
00005 #include "config.h"
00006
00007 #if CRYPTOPP_MSC_VERSION
00008 # pragma warning(disable: 4100)
00009 #endif
00010
00011 #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
00012 # pragma GCC diagnostic ignored "-Wunused"
00013 # pragma GCC diagnostic ignored "-Wunused-but-set-variable"
00014 #endif
00015
00016 #ifndef CRYPTOPP_IMPORTS
00017
00018 #include "integer.h"
00019 #include "secblock.h"
00020 #include "modarith.h"
00021 #include "nbtheory.h"
00022 #include "smartptr.h"
00023 #include "algparam.h"
00024 #include "filters.h"
00025 #include "asn.h"
00026 #include "oids.h"
00027 #include "words.h"
00028 #include "pubkey.h"
00029 #include "sha.h"
00030 #include "cpu.h"
00031 #include "misc.h"
00032
00033 #include <iostream>
00034
00035 #if _MSC_VER >= 1400
00036 #include <intrin.h>
00037 #endif
00038
00039 #ifdef __DECCXX
00040 #include <c_asm.h>
00041 #endif
00042
00043 #ifdef CRYPTOPP_MSVC6_NO_PP
00044 #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 instructions will be disabled.")
00045 #endif
00046
00047
00048
00049 #if CRYPTOPP_BOOL_X32 || defined(CRYPTOPP_DISABLE_INTEL_ASM)
00050 # undef CRYPTOPP_X86_ASM_AVAILABLE
00051 # undef CRYPTOPP_X32_ASM_AVAILABLE
00052 # undef CRYPTOPP_X64_ASM_AVAILABLE
00053 # undef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00054 # undef CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
00055 # define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 0
00056 # define CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE 0
00057 #else
00058 # define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86)
00059 #endif
00060
00061 NAMESPACE_BEGIN(CryptoPP)
00062
00063
00064 #if __ARMEL__ && (CRYPTOPP_GCC_VERSION >= 50200) && (CRYPTOPP_GCC_VERSION < 50300) && __OPTIMIZE__
00065 # define WORKAROUND_ARMEL_BUG 1
00066 #endif
00067
00068
00069 #if (__aarch64__ || __AARCH64EL__) && (CRYPTOPP_GCC_VERSION >= 50200) && (CRYPTOPP_GCC_VERSION < 50300)
00070 # define WORKAROUND_ARM64_BUG 1
00071 #endif
00072
00073 #if WORKAROUND_ARMEL_BUG
00074 # pragma GCC push_options
00075 # pragma GCC optimize("O1")
00076 #endif
00077
00078 #if WORKAROUND_ARM64_BUG
00079 # pragma GCC push_options
00080 # pragma GCC optimize("no-devirtualize")
00081 #endif
00082
00083 bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
00084 {
00085 if (valueType != typeid(Integer))
00086 return false;
00087 *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
00088 return true;
00089 }
00090
00091 inline static int Compare(const word *A, const word *B, size_t N)
00092 {
00093 while (N--)
00094 if (A[N] > B[N])
00095 return 1;
00096 else if (A[N] < B[N])
00097 return -1;
00098
00099 return 0;
00100 }
00101
00102 inline static int Increment(word *A, size_t N, word B=1)
00103 {
00104 assert(N);
00105 word t = A[0];
00106 A[0] = t+B;
00107 if (A[0] >= t)
00108 return 0;
00109 for (unsigned i=1; i<N; i++)
00110 if (++A[i])
00111 return 0;
00112 return 1;
00113 }
00114
00115 inline static int Decrement(word *A, size_t N, word B=1)
00116 {
00117 assert(N);
00118 word t = A[0];
00119 A[0] = t-B;
00120 if (A[0] <= t)
00121 return 0;
00122 for (unsigned i=1; i<N; i++)
00123 if (A[i]--)
00124 return 0;
00125 return 1;
00126 }
00127
00128 static void TwosComplement(word *A, size_t N)
00129 {
00130 Decrement(A, N);
00131 for (unsigned i=0; i<N; i++)
00132 A[i] = ~A[i];
00133 }
00134
00135 static word AtomicInverseModPower2(word A)
00136 {
00137 assert(A%2==1);
00138
00139 word R=A%8;
00140
00141 for (unsigned i=3; i<WORD_BITS; i*=2)
00142 R = R*(2-R*A);
00143
00144 assert(R*A==1);
00145 return R;
00146 }
00147
00148
00149
00150 #if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || (defined(__x86_64__) && defined(CRYPTOPP_WORD128_AVAILABLE))
00151 #define Declare2Words(x) word x##0, x##1;
00152 #define AssignWord(a, b) a##0 = b; a##1 = 0;
00153 #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
00154 #define LowWord(a) a##0
00155 #define HighWord(a) a##1
00156 #ifdef _MSC_VER
00157 #define MultiplyWordsLoHi(p0, p1, a, b) p0 = _umul128(a, b, &p1);
00158 #ifndef __INTEL_COMPILER
00159 #define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
00160 #endif
00161 #elif defined(__DECCXX)
00162 #define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b);
00163 #elif defined(__x86_64__)
00164 #if defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5100
00165
00166 #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc");
00167 #else
00168 #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
00169 #define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
00170 #define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
00171 #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
00172 #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
00173 #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
00174 #endif
00175 #endif
00176 #define MultiplyWords(p, a, b) MultiplyWordsLoHi(p##0, p##1, a, b)
00177 #ifndef Double3Words
00178 #define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2;
00179 #endif
00180 #ifndef Acc2WordsBy2
00181 #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
00182 #endif
00183 #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
00184 #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
00185 #define GetCarry(u) u##1
00186 #define GetBorrow(u) u##1
00187 #else
00188 #define Declare2Words(x) dword x;
00189 #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER)
00190 #define MultiplyWords(p, a, b) p = __emulu(a, b);
00191 #else
00192 #define MultiplyWords(p, a, b) p = (dword)a*b;
00193 #endif
00194 #define AssignWord(a, b) a = b;
00195 #define Add2WordsBy1(a, b, c) a = b + c;
00196 #define Acc2WordsBy2(a, b) a += b;
00197 #define LowWord(a) word(a)
00198 #define HighWord(a) word(a>>WORD_BITS)
00199 #define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2;
00200 #define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u);
00201 #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u);
00202 #define GetCarry(u) HighWord(u)
00203 #define GetBorrow(u) word(u>>(WORD_BITS*2-1))
00204 #endif
00205 #ifndef MulAcc
00206 #define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
00207 #endif
00208 #ifndef Acc2WordsBy1
00209 #define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b)
00210 #endif
00211 #ifndef Acc3WordsBy2
00212 #define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
00213 #endif
00214
00215 class DWord
00216 {
00217 public:
00218
00219
00220 #if (defined(__COVERITY__) || !defined(NDEBUG)) && defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
00221
00222 DWord() : m_whole(0) {memset(&m_whole, 0xa, sizeof(m_whole));}
00223 #elif (defined(__COVERITY__) || !defined(NDEBUG)) && !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
00224
00225 DWord() : m_halfs() {memset(&m_halfs, 0xa, sizeof(m_halfs));}
00226 #else
00227 DWord() {}
00228 #endif
00229
00230 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00231 explicit DWord(word low) : m_whole(low) {}
00232 #else
00233 explicit DWord(word low)
00234 {
00235 m_halfs.low = low;
00236 m_halfs.high = 0;
00237 }
00238 #endif
00239
00240 DWord(word low, word high)
00241 {
00242 m_halfs.low = low;
00243 m_halfs.high = high;
00244 }
00245
00246 static DWord Multiply(word a, word b)
00247 {
00248 DWord r;
00249 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00250 r.m_whole = (dword)a * b;
00251 #elif defined(MultiplyWordsLoHi)
00252 MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b);
00253 #else
00254 assert(0);
00255 #endif
00256 return r;
00257 }
00258
00259 static DWord MultiplyAndAdd(word a, word b, word c)
00260 {
00261 DWord r = Multiply(a, b);
00262 return r += c;
00263 }
00264
00265 DWord & operator+=(word a)
00266 {
00267 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00268 m_whole = m_whole + a;
00269 #else
00270 m_halfs.low += a;
00271 m_halfs.high += (m_halfs.low < a);
00272 #endif
00273 return *this;
00274 }
00275
00276 DWord operator+(word a)
00277 {
00278 DWord r;
00279 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00280 r.m_whole = m_whole + a;
00281 #else
00282 r.m_halfs.low = m_halfs.low + a;
00283 r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
00284 #endif
00285 return r;
00286 }
00287
00288 DWord operator-(DWord a)
00289 {
00290 DWord r;
00291 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00292 r.m_whole = m_whole - a.m_whole;
00293 #else
00294 r.m_halfs.low = m_halfs.low - a.m_halfs.low;
00295 r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
00296 #endif
00297 return r;
00298 }
00299
00300 DWord operator-(word a)
00301 {
00302 DWord r;
00303 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00304 r.m_whole = m_whole - a;
00305 #else
00306 r.m_halfs.low = m_halfs.low - a;
00307 r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
00308 #endif
00309 return r;
00310 }
00311
00312
00313 word operator/(word divisor);
00314
00315 word operator%(word a);
00316
00317 bool operator!() const
00318 {
00319 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00320 return !m_whole;
00321 #else
00322 return !m_halfs.high && !m_halfs.low;
00323 #endif
00324 }
00325
00326 word GetLowHalf() const {return m_halfs.low;}
00327 word GetHighHalf() const {return m_halfs.high;}
00328 word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
00329
00330 private:
00331 union
00332 {
00333 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00334 dword m_whole;
00335 #endif
00336 struct
00337 {
00338 #ifdef IS_LITTLE_ENDIAN
00339 word low;
00340 word high;
00341 #else
00342 word high;
00343 word low;
00344 #endif
00345 } m_halfs;
00346 };
00347 };
00348
00349 class Word
00350 {
00351 public:
00352
00353
00354 #if defined(__COVERITY__)
00355 Word() : m_whole(0) {}
00356 #elif !defined(NDEBUG)
00357
00358 Word() : m_whole(0) {memset(&m_whole, 0xa, sizeof(m_whole));}
00359 #else
00360 Word() {}
00361 #endif
00362
00363 Word(word value) : m_whole(value) {}
00364 Word(hword low, hword high) : m_whole(low | (word(high) << (WORD_BITS/2))) {}
00365
00366 static Word Multiply(hword a, hword b)
00367 {
00368 Word r;
00369 r.m_whole = (word)a * b;
00370 return r;
00371 }
00372
00373 Word operator-(Word a)
00374 {
00375 Word r;
00376 r.m_whole = m_whole - a.m_whole;
00377 return r;
00378 }
00379
00380 Word operator-(hword a)
00381 {
00382 Word r;
00383 r.m_whole = m_whole - a;
00384 return r;
00385 }
00386
00387
00388 hword operator/(hword divisor)
00389 {
00390 return hword(m_whole / divisor);
00391 }
00392
00393 bool operator!() const
00394 {
00395 return !m_whole;
00396 }
00397
00398 word GetWhole() const {return m_whole;}
00399 hword GetLowHalf() const {return hword(m_whole);}
00400 hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
00401 hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
00402
00403 private:
00404 word m_whole;
00405 };
00406
00407
00408 template <class S, class D>
00409 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
00410 {
00411 CRYPTOPP_UNUSED(dummy);
00412
00413
00414 assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
00415
00416
00417 S Q;
00418 if (S(B1+1) == 0)
00419 Q = A[2];
00420 else if (B1 > 0)
00421 Q = D(A[1], A[2]) / S(B1+1);
00422 else
00423 Q = D(A[0], A[1]) / B0;
00424
00425
00426 D p = D::Multiply(B0, Q);
00427 D u = (D) A[0] - p.GetLowHalf();
00428 A[0] = u.GetLowHalf();
00429 u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
00430 A[1] = u.GetLowHalf();
00431 A[2] += u.GetHighHalf();
00432
00433
00434 while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
00435 {
00436 u = (D) A[0] - B0;
00437 A[0] = u.GetLowHalf();
00438 u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
00439 A[1] = u.GetLowHalf();
00440 A[2] += u.GetHighHalf();
00441 Q++;
00442 assert(Q);
00443 }
00444
00445 return Q;
00446 }
00447
00448
00449 template <class S, class D>
00450 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
00451 {
00452 if (!B)
00453 return D(Ah.GetLowHalf(), Ah.GetHighHalf());
00454 else
00455 {
00456 S Q[2];
00457 T[0] = Al.GetLowHalf();
00458 T[1] = Al.GetHighHalf();
00459 T[2] = Ah.GetLowHalf();
00460 T[3] = Ah.GetHighHalf();
00461 Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
00462 Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
00463 return D(Q[0], Q[1]);
00464 }
00465 }
00466
00467
00468 inline word DWord::operator/(word a)
00469 {
00470 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00471 return word(m_whole / a);
00472 #else
00473 hword r[4];
00474 return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
00475 #endif
00476 }
00477
00478 inline word DWord::operator%(word a)
00479 {
00480 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00481 return word(m_whole % a);
00482 #else
00483 if (a < (word(1) << (WORD_BITS/2)))
00484 {
00485 hword h = hword(a);
00486 word r = m_halfs.high % h;
00487 r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
00488 return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
00489 }
00490 else
00491 {
00492 hword r[4];
00493 DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
00494 return Word(r[0], r[1]).GetWhole();
00495 }
00496 #endif
00497 }
00498
00499
00500
00501
00502 #if defined(__GNUC__)
00503 #define AddPrologue \
00504 int result; \
00505 __asm__ __volatile__ \
00506 ( \
00507 INTEL_NOPREFIX
00508 #define AddEpilogue \
00509 ".att_syntax prefix;" \
00510 : "=a" (result)\
00511 : "d" (C), "a" (A), "D" (B), "c" (N) \
00512 : "%esi", "memory", "cc" \
00513 );\
00514 return result;
00515 #define MulPrologue \
00516 __asm__ __volatile__ \
00517 ( \
00518 ".intel_syntax noprefix;" \
00519 AS1( push ebx) \
00520 AS2( mov ebx, edx)
00521 #define MulEpilogue \
00522 AS1( pop ebx) \
00523 ".att_syntax prefix;" \
00524 : \
00525 : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \
00526 : "%esi", "memory", "cc" \
00527 );
00528 #define SquPrologue MulPrologue
00529 #define SquEpilogue \
00530 AS1( pop ebx) \
00531 ".att_syntax prefix;" \
00532 : \
00533 : "d" (s_maskLow16), "c" (C), "a" (A) \
00534 : "%esi", "%edi", "memory", "cc" \
00535 );
00536 #define TopPrologue MulPrologue
00537 #define TopEpilogue \
00538 AS1( pop ebx) \
00539 ".att_syntax prefix;" \
00540 : \
00541 : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \
00542 : "memory", "cc" \
00543 );
00544 #else
00545 #define AddPrologue \
00546 __asm push edi \
00547 __asm push esi \
00548 __asm mov eax, [esp+12] \
00549 __asm mov edi, [esp+16]
00550 #define AddEpilogue \
00551 __asm pop esi \
00552 __asm pop edi \
00553 __asm ret 8
00554 #if _MSC_VER < 1300
00555 #define SaveEBX __asm push ebx
00556 #define RestoreEBX __asm pop ebx
00557 #else
00558 #define SaveEBX
00559 #define RestoreEBX
00560 #endif
00561 #define SquPrologue \
00562 AS2( mov eax, A) \
00563 AS2( mov ecx, C) \
00564 SaveEBX \
00565 AS2( lea ebx, s_maskLow16)
00566 #define MulPrologue \
00567 AS2( mov eax, A) \
00568 AS2( mov edi, B) \
00569 AS2( mov ecx, C) \
00570 SaveEBX \
00571 AS2( lea ebx, s_maskLow16)
00572 #define TopPrologue \
00573 AS2( mov eax, A) \
00574 AS2( mov edi, B) \
00575 AS2( mov ecx, C) \
00576 AS2( mov esi, L) \
00577 SaveEBX \
00578 AS2( lea ebx, s_maskLow16)
00579 #define SquEpilogue RestoreEBX
00580 #define MulEpilogue RestoreEBX
00581 #define TopEpilogue RestoreEBX
00582 #endif
00583
00584 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
00585 extern "C" {
00586 int Baseline_Add(size_t N, word *C, const word *A, const word *B);
00587 int Baseline_Sub(size_t N, word *C, const word *A, const word *B);
00588 }
00589 #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE)
00590 int Baseline_Add(size_t N, word *C, const word *A, const word *B)
00591 {
00592 word result;
00593 __asm__ __volatile__
00594 (
00595 INTEL_NOPREFIX
00596 AS1( neg %1)
00597 ASJ( jz, 1, f)
00598 AS2( mov %0,[%3+8*%1])
00599 AS2( add %0,[%4+8*%1])
00600 AS2( mov [%2+8*%1],%0)
00601 ASL(0)
00602 AS2( mov %0,[%3+8*%1+8])
00603 AS2( adc %0,[%4+8*%1+8])
00604 AS2( mov [%2+8*%1+8],%0)
00605 AS2( lea %1,[%1+2])
00606 ASJ( jrcxz, 1, f)
00607 AS2( mov %0,[%3+8*%1])
00608 AS2( adc %0,[%4+8*%1])
00609 AS2( mov [%2+8*%1],%0)
00610 ASJ( jmp, 0, b)
00611 ASL(1)
00612 AS2( mov %0, 0)
00613 AS2( adc %0, %0)
00614 ATT_NOPREFIX
00615 : "=&r" (result), "+c" (N)
00616 : "r" (C+N), "r" (A+N), "r" (B+N)
00617 : "memory", "cc"
00618 );
00619 return (int)result;
00620 }
00621
00622 int Baseline_Sub(size_t N, word *C, const word *A, const word *B)
00623 {
00624 word result;
00625 __asm__ __volatile__
00626 (
00627 INTEL_NOPREFIX
00628 AS1( neg %1)
00629 ASJ( jz, 1, f)
00630 AS2( mov %0,[%3+8*%1])
00631 AS2( sub %0,[%4+8*%1])
00632 AS2( mov [%2+8*%1],%0)
00633 ASL(0)
00634 AS2( mov %0,[%3+8*%1+8])
00635 AS2( sbb %0,[%4+8*%1+8])
00636 AS2( mov [%2+8*%1+8],%0)
00637 AS2( lea %1,[%1+2])
00638 ASJ( jrcxz, 1, f)
00639 AS2( mov %0,[%3+8*%1])
00640 AS2( sbb %0,[%4+8*%1])
00641 AS2( mov [%2+8*%1],%0)
00642 ASJ( jmp, 0, b)
00643 ASL(1)
00644 AS2( mov %0, 0)
00645 AS2( adc %0, %0)
00646 ATT_NOPREFIX
00647 : "=&r" (result), "+c" (N)
00648 : "r" (C+N), "r" (A+N), "r" (B+N)
00649 : "memory", "cc"
00650 );
00651 return (int)result;
00652 }
00653 #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
00654 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
00655 {
00656 AddPrologue
00657
00658
00659 AS2( lea eax, [eax+4*ecx])
00660 AS2( lea edi, [edi+4*ecx])
00661 AS2( lea edx, [edx+4*ecx])
00662
00663 AS1( neg ecx)
00664 AS2( test ecx, 2)
00665 ASJ( jz, 0, f)
00666 AS2( sub ecx, 2)
00667 ASJ( jmp, 1, f)
00668
00669 ASL(0)
00670 ASJ( jecxz, 2, f)
00671 AS2( mov esi,[eax+4*ecx])
00672 AS2( adc esi,[edi+4*ecx])
00673 AS2( mov [edx+4*ecx],esi)
00674 AS2( mov esi,[eax+4*ecx+4])
00675 AS2( adc esi,[edi+4*ecx+4])
00676 AS2( mov [edx+4*ecx+4],esi)
00677 ASL(1)
00678 AS2( mov esi,[eax+4*ecx+8])
00679 AS2( adc esi,[edi+4*ecx+8])
00680 AS2( mov [edx+4*ecx+8],esi)
00681 AS2( mov esi,[eax+4*ecx+12])
00682 AS2( adc esi,[edi+4*ecx+12])
00683 AS2( mov [edx+4*ecx+12],esi)
00684
00685 AS2( lea ecx,[ecx+4])
00686 ASJ( jmp, 0, b)
00687
00688 ASL(2)
00689 AS2( mov eax, 0)
00690 AS1( setc al)
00691
00692 AddEpilogue
00693 }
00694
00695 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
00696 {
00697 AddPrologue
00698
00699
00700 AS2( lea eax, [eax+4*ecx])
00701 AS2( lea edi, [edi+4*ecx])
00702 AS2( lea edx, [edx+4*ecx])
00703
00704 AS1( neg ecx)
00705 AS2( test ecx, 2)
00706 ASJ( jz, 0, f)
00707 AS2( sub ecx, 2)
00708 ASJ( jmp, 1, f)
00709
00710 ASL(0)
00711 ASJ( jecxz, 2, f)
00712 AS2( mov esi,[eax+4*ecx])
00713 AS2( sbb esi,[edi+4*ecx])
00714 AS2( mov [edx+4*ecx],esi)
00715 AS2( mov esi,[eax+4*ecx+4])
00716 AS2( sbb esi,[edi+4*ecx+4])
00717 AS2( mov [edx+4*ecx+4],esi)
00718 ASL(1)
00719 AS2( mov esi,[eax+4*ecx+8])
00720 AS2( sbb esi,[edi+4*ecx+8])
00721 AS2( mov [edx+4*ecx+8],esi)
00722 AS2( mov esi,[eax+4*ecx+12])
00723 AS2( sbb esi,[edi+4*ecx+12])
00724 AS2( mov [edx+4*ecx+12],esi)
00725
00726 AS2( lea ecx,[ecx+4])
00727 ASJ( jmp, 0, b)
00728
00729 ASL(2)
00730 AS2( mov eax, 0)
00731 AS1( setc al)
00732
00733 AddEpilogue
00734 }
00735
00736 #if CRYPTOPP_INTEGER_SSE2
00737 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B)
00738 {
00739 AddPrologue
00740
00741
00742 AS2( lea eax, [eax+4*ecx])
00743 AS2( lea edi, [edi+4*ecx])
00744 AS2( lea edx, [edx+4*ecx])
00745
00746 AS1( neg ecx)
00747 AS2( pxor mm2, mm2)
00748 ASJ( jz, 2, f)
00749 AS2( test ecx, 2)
00750 ASJ( jz, 0, f)
00751 AS2( sub ecx, 2)
00752 ASJ( jmp, 1, f)
00753
00754 ASL(0)
00755 AS2( movd mm0, DWORD PTR [eax+4*ecx])
00756 AS2( movd mm1, DWORD PTR [edi+4*ecx])
00757 AS2( paddq mm0, mm1)
00758 AS2( paddq mm2, mm0)
00759 AS2( movd DWORD PTR [edx+4*ecx], mm2)
00760 AS2( psrlq mm2, 32)
00761
00762 AS2( movd mm0, DWORD PTR [eax+4*ecx+4])
00763 AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
00764 AS2( paddq mm0, mm1)
00765 AS2( paddq mm2, mm0)
00766 AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
00767 AS2( psrlq mm2, 32)
00768
00769 ASL(1)
00770 AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
00771 AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
00772 AS2( paddq mm0, mm1)
00773 AS2( paddq mm2, mm0)
00774 AS2( movd DWORD PTR [edx+4*ecx+8], mm2)
00775 AS2( psrlq mm2, 32)
00776
00777 AS2( movd mm0, DWORD PTR [eax+4*ecx+12])
00778 AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
00779 AS2( paddq mm0, mm1)
00780 AS2( paddq mm2, mm0)
00781 AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
00782 AS2( psrlq mm2, 32)
00783
00784 AS2( add ecx, 4)
00785 ASJ( jnz, 0, b)
00786
00787 ASL(2)
00788 AS2( movd eax, mm2)
00789 AS1( emms)
00790
00791 AddEpilogue
00792 }
00793 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B)
00794 {
00795 AddPrologue
00796
00797
00798 AS2( lea eax, [eax+4*ecx])
00799 AS2( lea edi, [edi+4*ecx])
00800 AS2( lea edx, [edx+4*ecx])
00801
00802 AS1( neg ecx)
00803 AS2( pxor mm2, mm2)
00804 ASJ( jz, 2, f)
00805 AS2( test ecx, 2)
00806 ASJ( jz, 0, f)
00807 AS2( sub ecx, 2)
00808 ASJ( jmp, 1, f)
00809
00810 ASL(0)
00811 AS2( movd mm0, DWORD PTR [eax+4*ecx])
00812 AS2( movd mm1, DWORD PTR [edi+4*ecx])
00813 AS2( psubq mm0, mm1)
00814 AS2( psubq mm0, mm2)
00815 AS2( movd DWORD PTR [edx+4*ecx], mm0)
00816 AS2( psrlq mm0, 63)
00817
00818 AS2( movd mm2, DWORD PTR [eax+4*ecx+4])
00819 AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
00820 AS2( psubq mm2, mm1)
00821 AS2( psubq mm2, mm0)
00822 AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
00823 AS2( psrlq mm2, 63)
00824
00825 ASL(1)
00826 AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
00827 AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
00828 AS2( psubq mm0, mm1)
00829 AS2( psubq mm0, mm2)
00830 AS2( movd DWORD PTR [edx+4*ecx+8], mm0)
00831 AS2( psrlq mm0, 63)
00832
00833 AS2( movd mm2, DWORD PTR [eax+4*ecx+12])
00834 AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
00835 AS2( psubq mm2, mm1)
00836 AS2( psubq mm2, mm0)
00837 AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
00838 AS2( psrlq mm2, 63)
00839
00840 AS2( add ecx, 4)
00841 ASJ( jnz, 0, b)
00842
00843 ASL(2)
00844 AS2( movd eax, mm2)
00845 AS1( emms)
00846
00847 AddEpilogue
00848 }
00849 #endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00850 #else
00851 int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
00852 {
00853 assert (N%2 == 0);
00854
00855 Declare2Words(u);
00856 AssignWord(u, 0);
00857 for (size_t i=0; i<N; i+=2)
00858 {
00859 AddWithCarry(u, A[i], B[i]);
00860 C[i] = LowWord(u);
00861 AddWithCarry(u, A[i+1], B[i+1]);
00862 C[i+1] = LowWord(u);
00863 }
00864 return int(GetCarry(u));
00865 }
00866
00867 int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
00868 {
00869 assert (N%2 == 0);
00870
00871 Declare2Words(u);
00872 AssignWord(u, 0);
00873 for (size_t i=0; i<N; i+=2)
00874 {
00875 SubtractWithBorrow(u, A[i], B[i]);
00876 C[i] = LowWord(u);
00877 SubtractWithBorrow(u, A[i+1], B[i+1]);
00878 C[i+1] = LowWord(u);
00879 }
00880 return int(GetBorrow(u));
00881 }
00882 #endif
00883
00884 static word LinearMultiply(word *C, const word *A, word B, size_t N)
00885 {
00886 word carry=0;
00887 for(unsigned i=0; i<N; i++)
00888 {
00889 Declare2Words(p);
00890 MultiplyWords(p, A[i], B);
00891 Acc2WordsBy1(p, carry);
00892 C[i] = LowWord(p);
00893 carry = HighWord(p);
00894 }
00895 return carry;
00896 }
00897
00898 #ifndef CRYPTOPP_DOXYGEN_PROCESSING
00899
00900 #define Mul_2 \
00901 Mul_Begin(2) \
00902 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00903 Mul_End(1, 1)
00904
00905 #define Mul_4 \
00906 Mul_Begin(4) \
00907 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00908 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00909 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00910 Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
00911 Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \
00912 Mul_End(5, 3)
00913
00914 #define Mul_8 \
00915 Mul_Begin(8) \
00916 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00917 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00918 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00919 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00920 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00921 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00922 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
00923 Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
00924 Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
00925 Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
00926 Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
00927 Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
00928 Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \
00929 Mul_End(13, 7)
00930
00931 #define Mul_16 \
00932 Mul_Begin(16) \
00933 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00934 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00935 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00936 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00937 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00938 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00939 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
00940 Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
00941 Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
00942 Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
00943 Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
00944 Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
00945 Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
00946 Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
00947 Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
00948 Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
00949 Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
00950 Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
00951 Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
00952 Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
00953 Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
00954 Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
00955 Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
00956 Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
00957 Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
00958 Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
00959 Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
00960 Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
00961 Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \
00962 Mul_End(29, 15)
00963
00964 #define Squ_2 \
00965 Squ_Begin(2) \
00966 Squ_End(2)
00967
00968 #define Squ_4 \
00969 Squ_Begin(4) \
00970 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
00971 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
00972 Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \
00973 Squ_SaveAcc(4, 2, 3) Squ_NonDiag \
00974 Squ_End(4)
00975
00976 #define Squ_8 \
00977 Squ_Begin(8) \
00978 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
00979 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
00980 Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
00981 Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
00982 Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
00983 Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
00984 Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
00985 Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
00986 Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
00987 Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \
00988 Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \
00989 Squ_SaveAcc(12, 6, 7) Squ_NonDiag \
00990 Squ_End(8)
00991
00992 #define Squ_16 \
00993 Squ_Begin(16) \
00994 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
00995 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
00996 Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
00997 Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
00998 Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
00999 Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
01000 Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
01001 Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
01002 Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
01003 Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \
01004 Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \
01005 Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \
01006 Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \
01007 Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \
01008 Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \
01009 Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \
01010 Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \
01011 Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \
01012 Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \
01013 Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \
01014 Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \
01015 Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \
01016 Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \
01017 Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \
01018 Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \
01019 Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \
01020 Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \
01021 Squ_SaveAcc(28, 14, 15) Squ_NonDiag \
01022 Squ_End(16)
01023
01024 #define Bot_2 \
01025 Mul_Begin(2) \
01026 Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \
01027 Bot_End(2)
01028
01029 #define Bot_4 \
01030 Mul_Begin(4) \
01031 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
01032 Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \
01033 Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \
01034 Bot_End(4)
01035
01036 #define Bot_8 \
01037 Mul_Begin(8) \
01038 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
01039 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
01040 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
01041 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
01042 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
01043 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
01044 Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \
01045 Bot_End(8)
01046
01047 #define Bot_16 \
01048 Mul_Begin(16) \
01049 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
01050 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
01051 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
01052 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
01053 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
01054 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
01055 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
01056 Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
01057 Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
01058 Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
01059 Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
01060 Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
01061 Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
01062 Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
01063 Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \
01064 Bot_End(16)
01065
01066 #endif
01067
01068 #if 0
01069 #define Mul_Begin(n) \
01070 Declare2Words(p) \
01071 Declare2Words(c) \
01072 Declare2Words(d) \
01073 MultiplyWords(p, A[0], B[0]) \
01074 AssignWord(c, LowWord(p)) \
01075 AssignWord(d, HighWord(p))
01076
01077 #define Mul_Acc(i, j) \
01078 MultiplyWords(p, A[i], B[j]) \
01079 Acc2WordsBy1(c, LowWord(p)) \
01080 Acc2WordsBy1(d, HighWord(p))
01081
01082 #define Mul_SaveAcc(k, i, j) \
01083 R[k] = LowWord(c); \
01084 Add2WordsBy1(c, d, HighWord(c)) \
01085 MultiplyWords(p, A[i], B[j]) \
01086 AssignWord(d, HighWord(p)) \
01087 Acc2WordsBy1(c, LowWord(p))
01088
01089 #define Mul_End(n) \
01090 R[2*n-3] = LowWord(c); \
01091 Acc2WordsBy1(d, HighWord(c)) \
01092 MultiplyWords(p, A[n-1], B[n-1])\
01093 Acc2WordsBy2(d, p) \
01094 R[2*n-2] = LowWord(d); \
01095 R[2*n-1] = HighWord(d);
01096
01097 #define Bot_SaveAcc(k, i, j) \
01098 R[k] = LowWord(c); \
01099 word e = LowWord(d) + HighWord(c); \
01100 e += A[i] * B[j];
01101
01102 #define Bot_Acc(i, j) \
01103 e += A[i] * B[j];
01104
01105 #define Bot_End(n) \
01106 R[n-1] = e;
01107 #else
01108 #define Mul_Begin(n) \
01109 Declare2Words(p) \
01110 word c; \
01111 Declare2Words(d) \
01112 MultiplyWords(p, A[0], B[0]) \
01113 c = LowWord(p); \
01114 AssignWord(d, HighWord(p))
01115
01116 #define Mul_Acc(i, j) \
01117 MulAcc(c, d, A[i], B[j])
01118
01119 #define Mul_SaveAcc(k, i, j) \
01120 R[k] = c; \
01121 c = LowWord(d); \
01122 AssignWord(d, HighWord(d)) \
01123 MulAcc(c, d, A[i], B[j])
01124
01125 #define Mul_End(k, i) \
01126 R[k] = c; \
01127 MultiplyWords(p, A[i], B[i]) \
01128 Acc2WordsBy2(p, d) \
01129 R[k+1] = LowWord(p); \
01130 R[k+2] = HighWord(p);
01131
01132 #define Bot_SaveAcc(k, i, j) \
01133 R[k] = c; \
01134 c = LowWord(d); \
01135 c += A[i] * B[j];
01136
01137 #define Bot_Acc(i, j) \
01138 c += A[i] * B[j];
01139
01140 #define Bot_End(n) \
01141 R[n-1] = c;
01142 #endif
01143
01144 #define Squ_Begin(n) \
01145 Declare2Words(p) \
01146 word c; \
01147 Declare2Words(d) \
01148 Declare2Words(e) \
01149 MultiplyWords(p, A[0], A[0]) \
01150 R[0] = LowWord(p); \
01151 AssignWord(e, HighWord(p)) \
01152 MultiplyWords(p, A[0], A[1]) \
01153 c = LowWord(p); \
01154 AssignWord(d, HighWord(p)) \
01155 Squ_NonDiag \
01156
01157 #define Squ_NonDiag \
01158 Double3Words(c, d)
01159
01160 #define Squ_SaveAcc(k, i, j) \
01161 Acc3WordsBy2(c, d, e) \
01162 R[k] = c; \
01163 MultiplyWords(p, A[i], A[j]) \
01164 c = LowWord(p); \
01165 AssignWord(d, HighWord(p)) \
01166
01167 #define Squ_Acc(i, j) \
01168 MulAcc(c, d, A[i], A[j])
01169
01170 #define Squ_Diag(i) \
01171 Squ_NonDiag \
01172 MulAcc(c, d, A[i], A[i])
01173
01174 #define Squ_End(n) \
01175 Acc3WordsBy2(c, d, e) \
01176 R[2*n-3] = c; \
01177 MultiplyWords(p, A[n-1], A[n-1])\
01178 Acc2WordsBy2(p, e) \
01179 R[2*n-2] = LowWord(p); \
01180 R[2*n-1] = HighWord(p);
01181
01182
01183 void Baseline_Multiply2(word *R, const word *A, const word *B)
01184 {
01185 Mul_2
01186 }
01187
01188 void Baseline_Multiply4(word *R, const word *A, const word *B)
01189 {
01190 Mul_4
01191 }
01192
01193 void Baseline_Multiply8(word *R, const word *A, const word *B)
01194 {
01195 Mul_8
01196 }
01197
01198 void Baseline_Square2(word *R, const word *A)
01199 {
01200 Squ_2
01201 }
01202
01203 void Baseline_Square4(word *R, const word *A)
01204 {
01205 Squ_4
01206 }
01207
01208 void Baseline_Square8(word *R, const word *A)
01209 {
01210 Squ_8
01211 }
01212
01213 void Baseline_MultiplyBottom2(word *R, const word *A, const word *B)
01214 {
01215 Bot_2
01216 }
01217
01218 void Baseline_MultiplyBottom4(word *R, const word *A, const word *B)
01219 {
01220 Bot_4
01221 }
01222
01223 void Baseline_MultiplyBottom8(word *R, const word *A, const word *B)
01224 {
01225 Bot_8
01226 }
01227
01228 #define Top_Begin(n) \
01229 Declare2Words(p) \
01230 word c; \
01231 Declare2Words(d) \
01232 MultiplyWords(p, A[0], B[n-2]);\
01233 AssignWord(d, HighWord(p));
01234
01235 #define Top_Acc(i, j) \
01236 MultiplyWords(p, A[i], B[j]);\
01237 Acc2WordsBy1(d, HighWord(p));
01238
01239 #define Top_SaveAcc0(i, j) \
01240 c = LowWord(d); \
01241 AssignWord(d, HighWord(d)) \
01242 MulAcc(c, d, A[i], B[j])
01243
01244 #define Top_SaveAcc1(i, j) \
01245 c = L<c; \
01246 Acc2WordsBy1(d, c); \
01247 c = LowWord(d); \
01248 AssignWord(d, HighWord(d)) \
01249 MulAcc(c, d, A[i], B[j])
01250
01251 void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L)
01252 {
01253 CRYPTOPP_UNUSED(L);
01254 word T[4];
01255 Baseline_Multiply2(T, A, B);
01256 R[0] = T[2];
01257 R[1] = T[3];
01258 }
01259
01260 void Baseline_MultiplyTop4(word *R, const word *A, const word *B, word L)
01261 {
01262 Top_Begin(4)
01263 Top_Acc(1, 1) Top_Acc(2, 0) \
01264 Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
01265 Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
01266 Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
01267 Mul_End(1, 3)
01268 }
01269
01270 void Baseline_MultiplyTop8(word *R, const word *A, const word *B, word L)
01271 {
01272 Top_Begin(8)
01273 Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
01274 Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
01275 Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
01276 Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
01277 Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
01278 Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
01279 Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
01280 Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
01281 Mul_End(5, 7)
01282 }
01283
01284 #if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available
01285 void Baseline_Multiply16(word *R, const word *A, const word *B)
01286 {
01287 Mul_16
01288 }
01289
01290 void Baseline_Square16(word *R, const word *A)
01291 {
01292 Squ_16
01293 }
01294
01295 void Baseline_MultiplyBottom16(word *R, const word *A, const word *B)
01296 {
01297 Bot_16
01298 }
01299
01300 void Baseline_MultiplyTop16(word *R, const word *A, const word *B, word L)
01301 {
01302 Top_Begin(16)
01303 Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
01304 Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
01305 Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
01306 Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
01307 Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
01308 Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
01309 Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
01310 Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
01311 Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
01312 Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
01313 Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
01314 Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
01315 Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
01316 Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
01317 Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
01318 Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
01319 Mul_End(13, 15)
01320 }
01321 #endif
01322
01323
01324
01325 #if CRYPTOPP_INTEGER_SSE2
01326
01327 CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff};
01328
01329 #undef Mul_Begin
01330 #undef Mul_Acc
01331 #undef Top_Begin
01332 #undef Top_Acc
01333 #undef Squ_Acc
01334 #undef Squ_NonDiag
01335 #undef Squ_Diag
01336 #undef Squ_SaveAcc
01337 #undef Squ_Begin
01338 #undef Mul_SaveAcc
01339 #undef Bot_Acc
01340 #undef Bot_SaveAcc
01341 #undef Bot_End
01342 #undef Squ_End
01343 #undef Mul_End
01344
01345 #define SSE2_FinalSave(k) \
01346 AS2( psllq xmm5, 16) \
01347 AS2( paddq xmm4, xmm5) \
01348 AS2( movq QWORD PTR [ecx+8*(k)], xmm4)
01349
01350 #define SSE2_SaveShift(k) \
01351 AS2( movq xmm0, xmm6) \
01352 AS2( punpckhqdq xmm6, xmm0) \
01353 AS2( movq xmm1, xmm7) \
01354 AS2( punpckhqdq xmm7, xmm1) \
01355 AS2( paddd xmm6, xmm0) \
01356 AS2( pslldq xmm6, 4) \
01357 AS2( paddd xmm7, xmm1) \
01358 AS2( paddd xmm4, xmm6) \
01359 AS2( pslldq xmm7, 4) \
01360 AS2( movq xmm6, xmm4) \
01361 AS2( paddd xmm5, xmm7) \
01362 AS2( movq xmm7, xmm5) \
01363 AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
01364 AS2( psrlq xmm6, 16) \
01365 AS2( paddq xmm6, xmm7) \
01366 AS2( punpckhqdq xmm4, xmm0) \
01367 AS2( punpckhqdq xmm5, xmm0) \
01368 AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \
01369 AS2( psrlq xmm6, 3*16) \
01370 AS2( paddd xmm4, xmm6) \
01371
01372 #define Squ_SSE2_SaveShift(k) \
01373 AS2( movq xmm0, xmm6) \
01374 AS2( punpckhqdq xmm6, xmm0) \
01375 AS2( movq xmm1, xmm7) \
01376 AS2( punpckhqdq xmm7, xmm1) \
01377 AS2( paddd xmm6, xmm0) \
01378 AS2( pslldq xmm6, 4) \
01379 AS2( paddd xmm7, xmm1) \
01380 AS2( paddd xmm4, xmm6) \
01381 AS2( pslldq xmm7, 4) \
01382 AS2( movhlps xmm6, xmm4) \
01383 AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
01384 AS2( paddd xmm5, xmm7) \
01385 AS2( movhps QWORD PTR [esp+12], xmm5)\
01386 AS2( psrlq xmm4, 16) \
01387 AS2( paddq xmm4, xmm5) \
01388 AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \
01389 AS2( psrlq xmm4, 3*16) \
01390 AS2( paddd xmm4, xmm6) \
01391 AS2( movq QWORD PTR [esp+4], xmm4)\
01392
01393 #define SSE2_FirstMultiply(i) \
01394 AS2( movdqa xmm7, [esi+(i)*16])\
01395 AS2( movdqa xmm5, [edi-(i)*16])\
01396 AS2( pmuludq xmm5, xmm7) \
01397 AS2( movdqa xmm4, [ebx])\
01398 AS2( movdqa xmm6, xmm4) \
01399 AS2( pand xmm4, xmm5) \
01400 AS2( psrld xmm5, 16) \
01401 AS2( pmuludq xmm7, [edx-(i)*16])\
01402 AS2( pand xmm6, xmm7) \
01403 AS2( psrld xmm7, 16)
01404
01405 #define Squ_Begin(n) \
01406 SquPrologue \
01407 AS2( mov esi, esp)\
01408 AS2( and esp, 0xfffffff0)\
01409 AS2( lea edi, [esp-32*n])\
01410 AS2( sub esp, 32*n+16)\
01411 AS1( push esi)\
01412 AS2( mov esi, edi) \
01413 AS2( xor edx, edx) \
01414 ASL(1) \
01415 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
01416 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
01417 AS2( movdqa [edi+2*edx], xmm0) \
01418 AS2( psrlq xmm0, 32) \
01419 AS2( movdqa [edi+2*edx+16], xmm0) \
01420 AS2( movdqa [edi+16*n+2*edx], xmm1) \
01421 AS2( psrlq xmm1, 32) \
01422 AS2( movdqa [edi+16*n+2*edx+16], xmm1) \
01423 AS2( add edx, 16) \
01424 AS2( cmp edx, 8*(n)) \
01425 ASJ( jne, 1, b) \
01426 AS2( lea edx, [edi+16*n])\
01427 SSE2_FirstMultiply(0) \
01428
01429 #define Squ_Acc(i) \
01430 ASL(LSqu##i) \
01431 AS2( movdqa xmm1, [esi+(i)*16]) \
01432 AS2( movdqa xmm0, [edi-(i)*16]) \
01433 AS2( movdqa xmm2, [ebx]) \
01434 AS2( pmuludq xmm0, xmm1) \
01435 AS2( pmuludq xmm1, [edx-(i)*16]) \
01436 AS2( movdqa xmm3, xmm2) \
01437 AS2( pand xmm2, xmm0) \
01438 AS2( psrld xmm0, 16) \
01439 AS2( paddd xmm4, xmm2) \
01440 AS2( paddd xmm5, xmm0) \
01441 AS2( pand xmm3, xmm1) \
01442 AS2( psrld xmm1, 16) \
01443 AS2( paddd xmm6, xmm3) \
01444 AS2( paddd xmm7, xmm1) \
01445
01446 #define Squ_Acc1(i)
01447 #define Squ_Acc2(i) ASC(call, LSqu##i)
01448 #define Squ_Acc3(i) Squ_Acc2(i)
01449 #define Squ_Acc4(i) Squ_Acc2(i)
01450 #define Squ_Acc5(i) Squ_Acc2(i)
01451 #define Squ_Acc6(i) Squ_Acc2(i)
01452 #define Squ_Acc7(i) Squ_Acc2(i)
01453 #define Squ_Acc8(i) Squ_Acc2(i)
01454
01455 #define SSE2_End(E, n) \
01456 SSE2_SaveShift(2*(n)-3) \
01457 AS2( movdqa xmm7, [esi+16]) \
01458 AS2( movdqa xmm0, [edi]) \
01459 AS2( pmuludq xmm0, xmm7) \
01460 AS2( movdqa xmm2, [ebx]) \
01461 AS2( pmuludq xmm7, [edx]) \
01462 AS2( movdqa xmm6, xmm2) \
01463 AS2( pand xmm2, xmm0) \
01464 AS2( psrld xmm0, 16) \
01465 AS2( paddd xmm4, xmm2) \
01466 AS2( paddd xmm5, xmm0) \
01467 AS2( pand xmm6, xmm7) \
01468 AS2( psrld xmm7, 16) \
01469 SSE2_SaveShift(2*(n)-2) \
01470 SSE2_FinalSave(2*(n)-1) \
01471 AS1( pop esp)\
01472 E
01473
01474 #define Squ_End(n) SSE2_End(SquEpilogue, n)
01475 #define Mul_End(n) SSE2_End(MulEpilogue, n)
01476 #define Top_End(n) SSE2_End(TopEpilogue, n)
01477
01478 #define Squ_Column1(k, i) \
01479 Squ_SSE2_SaveShift(k) \
01480 AS2( add esi, 16) \
01481 SSE2_FirstMultiply(1)\
01482 Squ_Acc##i(i) \
01483 AS2( paddd xmm4, xmm4) \
01484 AS2( paddd xmm5, xmm5) \
01485 AS2( movdqa xmm3, [esi]) \
01486 AS2( movq xmm1, QWORD PTR [esi+8]) \
01487 AS2( pmuludq xmm1, xmm3) \
01488 AS2( pmuludq xmm3, xmm3) \
01489 AS2( movdqa xmm0, [ebx])\
01490 AS2( movdqa xmm2, xmm0) \
01491 AS2( pand xmm0, xmm1) \
01492 AS2( psrld xmm1, 16) \
01493 AS2( paddd xmm6, xmm0) \
01494 AS2( paddd xmm7, xmm1) \
01495 AS2( pand xmm2, xmm3) \
01496 AS2( psrld xmm3, 16) \
01497 AS2( paddd xmm6, xmm6) \
01498 AS2( paddd xmm7, xmm7) \
01499 AS2( paddd xmm4, xmm2) \
01500 AS2( paddd xmm5, xmm3) \
01501 AS2( movq xmm0, QWORD PTR [esp+4])\
01502 AS2( movq xmm1, QWORD PTR [esp+12])\
01503 AS2( paddd xmm4, xmm0)\
01504 AS2( paddd xmm5, xmm1)\
01505
01506 #define Squ_Column0(k, i) \
01507 Squ_SSE2_SaveShift(k) \
01508 AS2( add edi, 16) \
01509 AS2( add edx, 16) \
01510 SSE2_FirstMultiply(1)\
01511 Squ_Acc##i(i) \
01512 AS2( paddd xmm6, xmm6) \
01513 AS2( paddd xmm7, xmm7) \
01514 AS2( paddd xmm4, xmm4) \
01515 AS2( paddd xmm5, xmm5) \
01516 AS2( movq xmm0, QWORD PTR [esp+4])\
01517 AS2( movq xmm1, QWORD PTR [esp+12])\
01518 AS2( paddd xmm4, xmm0)\
01519 AS2( paddd xmm5, xmm1)\
01520
01521 #define SSE2_MulAdd45 \
01522 AS2( movdqa xmm7, [esi]) \
01523 AS2( movdqa xmm0, [edi]) \
01524 AS2( pmuludq xmm0, xmm7) \
01525 AS2( movdqa xmm2, [ebx]) \
01526 AS2( pmuludq xmm7, [edx]) \
01527 AS2( movdqa xmm6, xmm2) \
01528 AS2( pand xmm2, xmm0) \
01529 AS2( psrld xmm0, 16) \
01530 AS2( paddd xmm4, xmm2) \
01531 AS2( paddd xmm5, xmm0) \
01532 AS2( pand xmm6, xmm7) \
01533 AS2( psrld xmm7, 16)
01534
01535 #define Mul_Begin(n) \
01536 MulPrologue \
01537 AS2( mov esi, esp)\
01538 AS2( and esp, 0xfffffff0)\
01539 AS2( sub esp, 48*n+16)\
01540 AS1( push esi)\
01541 AS2( xor edx, edx) \
01542 ASL(1) \
01543 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
01544 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
01545 ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
01546 AS2( movdqa [esp+20+2*edx], xmm0) \
01547 AS2( psrlq xmm0, 32) \
01548 AS2( movdqa [esp+20+2*edx+16], xmm0) \
01549 AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
01550 AS2( psrlq xmm1, 32) \
01551 AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
01552 AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
01553 AS2( psrlq xmm2, 32) \
01554 AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
01555 AS2( add edx, 16) \
01556 AS2( cmp edx, 8*(n)) \
01557 ASJ( jne, 1, b) \
01558 AS2( lea edi, [esp+20])\
01559 AS2( lea edx, [esp+20+16*n])\
01560 AS2( lea esi, [esp+20+32*n])\
01561 SSE2_FirstMultiply(0) \
01562
01563 #define Mul_Acc(i) \
01564 ASL(LMul##i) \
01565 AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
01566 AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
01567 AS2( movdqa xmm2, [ebx]) \
01568 AS2( pmuludq xmm0, xmm1) \
01569 AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
01570 AS2( movdqa xmm3, xmm2) \
01571 AS2( pand xmm2, xmm0) \
01572 AS2( psrld xmm0, 16) \
01573 AS2( paddd xmm4, xmm2) \
01574 AS2( paddd xmm5, xmm0) \
01575 AS2( pand xmm3, xmm1) \
01576 AS2( psrld xmm1, 16) \
01577 AS2( paddd xmm6, xmm3) \
01578 AS2( paddd xmm7, xmm1) \
01579
01580 #define Mul_Acc1(i)
01581 #define Mul_Acc2(i) ASC(call, LMul##i)
01582 #define Mul_Acc3(i) Mul_Acc2(i)
01583 #define Mul_Acc4(i) Mul_Acc2(i)
01584 #define Mul_Acc5(i) Mul_Acc2(i)
01585 #define Mul_Acc6(i) Mul_Acc2(i)
01586 #define Mul_Acc7(i) Mul_Acc2(i)
01587 #define Mul_Acc8(i) Mul_Acc2(i)
01588 #define Mul_Acc9(i) Mul_Acc2(i)
01589 #define Mul_Acc10(i) Mul_Acc2(i)
01590 #define Mul_Acc11(i) Mul_Acc2(i)
01591 #define Mul_Acc12(i) Mul_Acc2(i)
01592 #define Mul_Acc13(i) Mul_Acc2(i)
01593 #define Mul_Acc14(i) Mul_Acc2(i)
01594 #define Mul_Acc15(i) Mul_Acc2(i)
01595 #define Mul_Acc16(i) Mul_Acc2(i)
01596
01597 #define Mul_Column1(k, i) \
01598 SSE2_SaveShift(k) \
01599 AS2( add esi, 16) \
01600 SSE2_MulAdd45\
01601 Mul_Acc##i(i) \
01602
01603 #define Mul_Column0(k, i) \
01604 SSE2_SaveShift(k) \
01605 AS2( add edi, 16) \
01606 AS2( add edx, 16) \
01607 SSE2_MulAdd45\
01608 Mul_Acc##i(i) \
01609
01610 #define Bot_Acc(i) \
01611 AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
01612 AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
01613 AS2( pmuludq xmm0, xmm1) \
01614 AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
01615 AS2( paddq xmm4, xmm0) \
01616 AS2( paddd xmm6, xmm1)
01617
01618 #define Bot_SaveAcc(k) \
01619 SSE2_SaveShift(k) \
01620 AS2( add edi, 16) \
01621 AS2( add edx, 16) \
01622 AS2( movdqa xmm6, [esi]) \
01623 AS2( movdqa xmm0, [edi]) \
01624 AS2( pmuludq xmm0, xmm6) \
01625 AS2( paddq xmm4, xmm0) \
01626 AS2( psllq xmm5, 16) \
01627 AS2( paddq xmm4, xmm5) \
01628 AS2( pmuludq xmm6, [edx])
01629
01630 #define Bot_End(n) \
01631 AS2( movhlps xmm7, xmm6) \
01632 AS2( paddd xmm6, xmm7) \
01633 AS2( psllq xmm6, 32) \
01634 AS2( paddd xmm4, xmm6) \
01635 AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \
01636 AS1( pop esp)\
01637 MulEpilogue
01638
01639 #define Top_Begin(n) \
01640 TopPrologue \
01641 AS2( mov edx, esp)\
01642 AS2( and esp, 0xfffffff0)\
01643 AS2( sub esp, 48*n+16)\
01644 AS1( push edx)\
01645 AS2( xor edx, edx) \
01646 ASL(1) \
01647 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
01648 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
01649 ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
01650 AS2( movdqa [esp+20+2*edx], xmm0) \
01651 AS2( psrlq xmm0, 32) \
01652 AS2( movdqa [esp+20+2*edx+16], xmm0) \
01653 AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
01654 AS2( psrlq xmm1, 32) \
01655 AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
01656 AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
01657 AS2( psrlq xmm2, 32) \
01658 AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
01659 AS2( add edx, 16) \
01660 AS2( cmp edx, 8*(n)) \
01661 ASJ( jne, 1, b) \
01662 AS2( mov eax, esi) \
01663 AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\
01664 AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\
01665 AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\
01666 AS2( pxor xmm4, xmm4)\
01667 AS2( pxor xmm5, xmm5)
01668
01669 #define Top_Acc(i) \
01670 AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \
01671 AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
01672 AS2( psrlq xmm0, 48) \
01673 AS2( paddd xmm5, xmm0)\
01674
01675 #define Top_Column0(i) \
01676 AS2( psllq xmm5, 32) \
01677 AS2( add edi, 16) \
01678 AS2( add edx, 16) \
01679 SSE2_MulAdd45\
01680 Mul_Acc##i(i) \
01681
01682 #define Top_Column1(i) \
01683 SSE2_SaveShift(0) \
01684 AS2( add esi, 16) \
01685 SSE2_MulAdd45\
01686 Mul_Acc##i(i) \
01687 AS2( shr eax, 16) \
01688 AS2( movd xmm0, eax)\
01689 AS2( movd xmm1, [ecx+4])\
01690 AS2( psrld xmm1, 16)\
01691 AS2( pcmpgtd xmm1, xmm0)\
01692 AS2( psrld xmm1, 31)\
01693 AS2( paddd xmm4, xmm1)\
01694
01695 void SSE2_Square4(word *C, const word *A)
01696 {
01697 Squ_Begin(2)
01698 Squ_Column0(0, 1)
01699 Squ_End(2)
01700 }
01701
01702 void SSE2_Square8(word *C, const word *A)
01703 {
01704 Squ_Begin(4)
01705 #ifndef __GNUC__
01706 ASJ( jmp, 0, f)
01707 Squ_Acc(2)
01708 AS1( ret) ASL(0)
01709 #endif
01710 Squ_Column0(0, 1)
01711 Squ_Column1(1, 1)
01712 Squ_Column0(2, 2)
01713 Squ_Column1(3, 1)
01714 Squ_Column0(4, 1)
01715 Squ_End(4)
01716 }
01717
01718 void SSE2_Square16(word *C, const word *A)
01719 {
01720 Squ_Begin(8)
01721 #ifndef __GNUC__
01722 ASJ( jmp, 0, f)
01723 Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
01724 AS1( ret) ASL(0)
01725 #endif
01726 Squ_Column0(0, 1)
01727 Squ_Column1(1, 1)
01728 Squ_Column0(2, 2)
01729 Squ_Column1(3, 2)
01730 Squ_Column0(4, 3)
01731 Squ_Column1(5, 3)
01732 Squ_Column0(6, 4)
01733 Squ_Column1(7, 3)
01734 Squ_Column0(8, 3)
01735 Squ_Column1(9, 2)
01736 Squ_Column0(10, 2)
01737 Squ_Column1(11, 1)
01738 Squ_Column0(12, 1)
01739 Squ_End(8)
01740 }
01741
01742 void SSE2_Square32(word *C, const word *A)
01743 {
01744 Squ_Begin(16)
01745 ASJ( jmp, 0, f)
01746 Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
01747 AS1( ret) ASL(0)
01748 Squ_Column0(0, 1)
01749 Squ_Column1(1, 1)
01750 Squ_Column0(2, 2)
01751 Squ_Column1(3, 2)
01752 Squ_Column0(4, 3)
01753 Squ_Column1(5, 3)
01754 Squ_Column0(6, 4)
01755 Squ_Column1(7, 4)
01756 Squ_Column0(8, 5)
01757 Squ_Column1(9, 5)
01758 Squ_Column0(10, 6)
01759 Squ_Column1(11, 6)
01760 Squ_Column0(12, 7)
01761 Squ_Column1(13, 7)
01762 Squ_Column0(14, 8)
01763 Squ_Column1(15, 7)
01764 Squ_Column0(16, 7)
01765 Squ_Column1(17, 6)
01766 Squ_Column0(18, 6)
01767 Squ_Column1(19, 5)
01768 Squ_Column0(20, 5)
01769 Squ_Column1(21, 4)
01770 Squ_Column0(22, 4)
01771 Squ_Column1(23, 3)
01772 Squ_Column0(24, 3)
01773 Squ_Column1(25, 2)
01774 Squ_Column0(26, 2)
01775 Squ_Column1(27, 1)
01776 Squ_Column0(28, 1)
01777 Squ_End(16)
01778 }
01779
01780 void SSE2_Multiply4(word *C, const word *A, const word *B)
01781 {
01782 Mul_Begin(2)
01783 #ifndef __GNUC__
01784 ASJ( jmp, 0, f)
01785 Mul_Acc(2)
01786 AS1( ret) ASL(0)
01787 #endif
01788 Mul_Column0(0, 2)
01789 Mul_End(2)
01790 }
01791
01792 void SSE2_Multiply8(word *C, const word *A, const word *B)
01793 {
01794 Mul_Begin(4)
01795 #ifndef __GNUC__
01796 ASJ( jmp, 0, f)
01797 Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01798 AS1( ret) ASL(0)
01799 #endif
01800 Mul_Column0(0, 2)
01801 Mul_Column1(1, 3)
01802 Mul_Column0(2, 4)
01803 Mul_Column1(3, 3)
01804 Mul_Column0(4, 2)
01805 Mul_End(4)
01806 }
01807
01808 void SSE2_Multiply16(word *C, const word *A, const word *B)
01809 {
01810 Mul_Begin(8)
01811 #ifndef __GNUC__
01812 ASJ( jmp, 0, f)
01813 Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01814 AS1( ret) ASL(0)
01815 #endif
01816 Mul_Column0(0, 2)
01817 Mul_Column1(1, 3)
01818 Mul_Column0(2, 4)
01819 Mul_Column1(3, 5)
01820 Mul_Column0(4, 6)
01821 Mul_Column1(5, 7)
01822 Mul_Column0(6, 8)
01823 Mul_Column1(7, 7)
01824 Mul_Column0(8, 6)
01825 Mul_Column1(9, 5)
01826 Mul_Column0(10, 4)
01827 Mul_Column1(11, 3)
01828 Mul_Column0(12, 2)
01829 Mul_End(8)
01830 }
01831
01832 void SSE2_Multiply32(word *C, const word *A, const word *B)
01833 {
01834 Mul_Begin(16)
01835 ASJ( jmp, 0, f)
01836 Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01837 AS1( ret) ASL(0)
01838 Mul_Column0(0, 2)
01839 Mul_Column1(1, 3)
01840 Mul_Column0(2, 4)
01841 Mul_Column1(3, 5)
01842 Mul_Column0(4, 6)
01843 Mul_Column1(5, 7)
01844 Mul_Column0(6, 8)
01845 Mul_Column1(7, 9)
01846 Mul_Column0(8, 10)
01847 Mul_Column1(9, 11)
01848 Mul_Column0(10, 12)
01849 Mul_Column1(11, 13)
01850 Mul_Column0(12, 14)
01851 Mul_Column1(13, 15)
01852 Mul_Column0(14, 16)
01853 Mul_Column1(15, 15)
01854 Mul_Column0(16, 14)
01855 Mul_Column1(17, 13)
01856 Mul_Column0(18, 12)
01857 Mul_Column1(19, 11)
01858 Mul_Column0(20, 10)
01859 Mul_Column1(21, 9)
01860 Mul_Column0(22, 8)
01861 Mul_Column1(23, 7)
01862 Mul_Column0(24, 6)
01863 Mul_Column1(25, 5)
01864 Mul_Column0(26, 4)
01865 Mul_Column1(27, 3)
01866 Mul_Column0(28, 2)
01867 Mul_End(16)
01868 }
01869
01870 void SSE2_MultiplyBottom4(word *C, const word *A, const word *B)
01871 {
01872 Mul_Begin(2)
01873 Bot_SaveAcc(0) Bot_Acc(2)
01874 Bot_End(2)
01875 }
01876
01877 void SSE2_MultiplyBottom8(word *C, const word *A, const word *B)
01878 {
01879 Mul_Begin(4)
01880 #ifndef __GNUC__
01881 ASJ( jmp, 0, f)
01882 Mul_Acc(3) Mul_Acc(2)
01883 AS1( ret) ASL(0)
01884 #endif
01885 Mul_Column0(0, 2)
01886 Mul_Column1(1, 3)
01887 Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
01888 Bot_End(4)
01889 }
01890
01891 void SSE2_MultiplyBottom16(word *C, const word *A, const word *B)
01892 {
01893 Mul_Begin(8)
01894 #ifndef __GNUC__
01895 ASJ( jmp, 0, f)
01896 Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01897 AS1( ret) ASL(0)
01898 #endif
01899 Mul_Column0(0, 2)
01900 Mul_Column1(1, 3)
01901 Mul_Column0(2, 4)
01902 Mul_Column1(3, 5)
01903 Mul_Column0(4, 6)
01904 Mul_Column1(5, 7)
01905 Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
01906 Bot_End(8)
01907 }
01908
01909 void SSE2_MultiplyBottom32(word *C, const word *A, const word *B)
01910 {
01911 Mul_Begin(16)
01912 #ifndef __GNUC__
01913 ASJ( jmp, 0, f)
01914 Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01915 AS1( ret) ASL(0)
01916 #endif
01917 Mul_Column0(0, 2)
01918 Mul_Column1(1, 3)
01919 Mul_Column0(2, 4)
01920 Mul_Column1(3, 5)
01921 Mul_Column0(4, 6)
01922 Mul_Column1(5, 7)
01923 Mul_Column0(6, 8)
01924 Mul_Column1(7, 9)
01925 Mul_Column0(8, 10)
01926 Mul_Column1(9, 11)
01927 Mul_Column0(10, 12)
01928 Mul_Column1(11, 13)
01929 Mul_Column0(12, 14)
01930 Mul_Column1(13, 15)
01931 Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
01932 Bot_End(16)
01933 }
01934
01935 void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L)
01936 {
01937 Top_Begin(4)
01938 Top_Acc(3) Top_Acc(2) Top_Acc(1)
01939 #ifndef __GNUC__
01940 ASJ( jmp, 0, f)
01941 Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01942 AS1( ret) ASL(0)
01943 #endif
01944 Top_Column0(4)
01945 Top_Column1(3)
01946 Mul_Column0(0, 2)
01947 Top_End(2)
01948 }
01949
01950 void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L)
01951 {
01952 Top_Begin(8)
01953 Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
01954 #ifndef __GNUC__
01955 ASJ( jmp, 0, f)
01956 Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01957 AS1( ret) ASL(0)
01958 #endif
01959 Top_Column0(8)
01960 Top_Column1(7)
01961 Mul_Column0(0, 6)
01962 Mul_Column1(1, 5)
01963 Mul_Column0(2, 4)
01964 Mul_Column1(3, 3)
01965 Mul_Column0(4, 2)
01966 Top_End(4)
01967 }
01968
01969 void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L)
01970 {
01971 Top_Begin(16)
01972 Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
01973 #ifndef __GNUC__
01974 ASJ( jmp, 0, f)
01975 Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01976 AS1( ret) ASL(0)
01977 #endif
01978 Top_Column0(16)
01979 Top_Column1(15)
01980 Mul_Column0(0, 14)
01981 Mul_Column1(1, 13)
01982 Mul_Column0(2, 12)
01983 Mul_Column1(3, 11)
01984 Mul_Column0(4, 10)
01985 Mul_Column1(5, 9)
01986 Mul_Column0(6, 8)
01987 Mul_Column1(7, 7)
01988 Mul_Column0(8, 6)
01989 Mul_Column1(9, 5)
01990 Mul_Column0(10, 4)
01991 Mul_Column1(11, 3)
01992 Mul_Column0(12, 2)
01993 Top_End(8)
01994 }
01995
01996 #endif // #if CRYPTOPP_INTEGER_SSE2
01997
01998
01999
02000 typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B);
02001 typedef void (* PMul)(word *C, const word *A, const word *B);
02002 typedef void (* PSqu)(word *C, const word *A);
02003 typedef void (* PMulTop)(word *C, const word *A, const word *B, word L);
02004
02005 #if CRYPTOPP_INTEGER_SSE2
02006 static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
02007 static size_t s_recursionLimit = 8;
02008 #else
02009 static const size_t s_recursionLimit = 16;
02010 #endif
02011
02012 static PMul s_pMul[9], s_pBot[9];
02013 static PSqu s_pSqu[9];
02014 static PMulTop s_pTop[9];
02015
02016 static void SetFunctionPointers()
02017 {
02018 s_pMul[0] = &Baseline_Multiply2;
02019 s_pBot[0] = &Baseline_MultiplyBottom2;
02020 s_pSqu[0] = &Baseline_Square2;
02021 s_pTop[0] = &Baseline_MultiplyTop2;
02022 s_pTop[1] = &Baseline_MultiplyTop4;
02023
02024 #if CRYPTOPP_INTEGER_SSE2
02025 if (HasSSE2())
02026 {
02027 #if _MSC_VER != 1200 || defined(NDEBUG)
02028 if (IsP4())
02029 {
02030 s_pAdd = &SSE2_Add;
02031 s_pSub = &SSE2_Sub;
02032 }
02033 #endif
02034
02035 s_recursionLimit = 32;
02036
02037 s_pMul[1] = &SSE2_Multiply4;
02038 s_pMul[2] = &SSE2_Multiply8;
02039 s_pMul[4] = &SSE2_Multiply16;
02040 s_pMul[8] = &SSE2_Multiply32;
02041
02042 s_pBot[1] = &SSE2_MultiplyBottom4;
02043 s_pBot[2] = &SSE2_MultiplyBottom8;
02044 s_pBot[4] = &SSE2_MultiplyBottom16;
02045 s_pBot[8] = &SSE2_MultiplyBottom32;
02046
02047 s_pSqu[1] = &SSE2_Square4;
02048 s_pSqu[2] = &SSE2_Square8;
02049 s_pSqu[4] = &SSE2_Square16;
02050 s_pSqu[8] = &SSE2_Square32;
02051
02052 s_pTop[2] = &SSE2_MultiplyTop8;
02053 s_pTop[4] = &SSE2_MultiplyTop16;
02054 s_pTop[8] = &SSE2_MultiplyTop32;
02055 }
02056 else
02057 #endif
02058 {
02059 s_pMul[1] = &Baseline_Multiply4;
02060 s_pMul[2] = &Baseline_Multiply8;
02061
02062 s_pBot[1] = &Baseline_MultiplyBottom4;
02063 s_pBot[2] = &Baseline_MultiplyBottom8;
02064
02065 s_pSqu[1] = &Baseline_Square4;
02066 s_pSqu[2] = &Baseline_Square8;
02067
02068 s_pTop[2] = &Baseline_MultiplyTop8;
02069
02070 #if !CRYPTOPP_INTEGER_SSE2
02071 s_pMul[4] = &Baseline_Multiply16;
02072 s_pBot[4] = &Baseline_MultiplyBottom16;
02073 s_pSqu[4] = &Baseline_Square16;
02074 s_pTop[4] = &Baseline_MultiplyTop16;
02075 #endif
02076 }
02077 }
02078
02079 inline int Add(word *C, const word *A, const word *B, size_t N)
02080 {
02081 #if CRYPTOPP_INTEGER_SSE2
02082 return s_pAdd(N, C, A, B);
02083 #else
02084 return Baseline_Add(N, C, A, B);
02085 #endif
02086 }
02087
02088 inline int Subtract(word *C, const word *A, const word *B, size_t N)
02089 {
02090 #if CRYPTOPP_INTEGER_SSE2
02091 return s_pSub(N, C, A, B);
02092 #else
02093 return Baseline_Sub(N, C, A, B);
02094 #endif
02095 }
02096
02097
02098
02099
02100 #define A0 A
02101 #define A1 (A+N2)
02102 #define B0 B
02103 #define B1 (B+N2)
02104
02105 #define T0 T
02106 #define T1 (T+N2)
02107 #define T2 (T+N)
02108 #define T3 (T+N+N2)
02109
02110 #define R0 R
02111 #define R1 (R+N2)
02112 #define R2 (R+N)
02113 #define R3 (R+N+N2)
02114
02115
02116
02117
02118
02119
02120 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N)
02121 {
02122 assert(N>=2 && N%2==0);
02123
02124 if (N <= s_recursionLimit)
02125 s_pMul[N/4](R, A, B);
02126 else
02127 {
02128 const size_t N2 = N/2;
02129
02130 size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
02131 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
02132
02133 size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
02134 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
02135
02136 RecursiveMultiply(R2, T2, A1, B1, N2);
02137 RecursiveMultiply(T0, T2, R0, R1, N2);
02138 RecursiveMultiply(R0, T2, A0, B0, N2);
02139
02140
02141
02142 int c2 = Add(R2, R2, R1, N2);
02143 int c3 = c2;
02144 c2 += Add(R1, R2, R0, N2);
02145 c3 += Add(R2, R2, R3, N2);
02146
02147 if (AN2 == BN2)
02148 c3 -= Subtract(R1, R1, T0, N);
02149 else
02150 c3 += Add(R1, R1, T0, N);
02151
02152 c3 += Increment(R2, N2, c2);
02153 assert (c3 >= 0 && c3 <= 2);
02154 Increment(R3, N2, c3);
02155 }
02156 }
02157
02158
02159
02160
02161
02162 void RecursiveSquare(word *R, word *T, const word *A, size_t N)
02163 {
02164 assert(N && N%2==0);
02165
02166 if (N <= s_recursionLimit)
02167 s_pSqu[N/4](R, A);
02168 else
02169 {
02170 const size_t N2 = N/2;
02171
02172 RecursiveSquare(R0, T2, A0, N2);
02173 RecursiveSquare(R2, T2, A1, N2);
02174 RecursiveMultiply(T0, T2, A0, A1, N2);
02175
02176 int carry = Add(R1, R1, T0, N);
02177 carry += Add(R1, R1, T0, N);
02178 Increment(R3, N2, carry);
02179 }
02180 }
02181
02182
02183
02184
02185
02186
02187 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
02188 {
02189 assert(N>=2 && N%2==0);
02190
02191 if (N <= s_recursionLimit)
02192 s_pBot[N/4](R, A, B);
02193 else
02194 {
02195 const size_t N2 = N/2;
02196
02197 RecursiveMultiply(R, T, A0, B0, N2);
02198 RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
02199 Add(R1, R1, T0, N2);
02200 RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
02201 Add(R1, R1, T0, N2);
02202 }
02203 }
02204
02205
02206
02207
02208
02209
02210
02211 void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
02212 {
02213 assert(N>=2 && N%2==0);
02214
02215 if (N <= s_recursionLimit)
02216 s_pTop[N/4](R, A, B, L[N-1]);
02217 else
02218 {
02219 const size_t N2 = N/2;
02220
02221 size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
02222 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
02223
02224 size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
02225 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
02226
02227 RecursiveMultiply(T0, T2, R0, R1, N2);
02228 RecursiveMultiply(R0, T2, A1, B1, N2);
02229
02230
02231
02232 int t, c3;
02233 int c2 = Subtract(T2, L+N2, L, N2);
02234
02235 if (AN2 == BN2)
02236 {
02237 c2 -= Add(T2, T2, T0, N2);
02238 t = (Compare(T2, R0, N2) == -1);
02239 c3 = t - Subtract(T2, T2, T1, N2);
02240 }
02241 else
02242 {
02243 c2 += Subtract(T2, T2, T0, N2);
02244 t = (Compare(T2, R0, N2) == -1);
02245 c3 = t + Add(T2, T2, T1, N2);
02246 }
02247
02248 c2 += t;
02249 if (c2 >= 0)
02250 c3 += Increment(T2, N2, c2);
02251 else
02252 c3 -= Decrement(T2, N2, -c2);
02253 c3 += Add(R0, T2, R1, N2);
02254
02255 assert (c3 >= 0 && c3 <= 2);
02256 Increment(R1, N2, c3);
02257 }
02258 }
02259
02260 inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N)
02261 {
02262 RecursiveMultiply(R, T, A, B, N);
02263 }
02264
02265 inline void Square(word *R, word *T, const word *A, size_t N)
02266 {
02267 RecursiveSquare(R, T, A, N);
02268 }
02269
02270 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
02271 {
02272 RecursiveMultiplyBottom(R, T, A, B, N);
02273 }
02274
02275
02276
02277
02278
02279
02280 void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB)
02281 {
02282 if (NA == NB)
02283 {
02284 if (A == B)
02285 Square(R, T, A, NA);
02286 else
02287 Multiply(R, T, A, B, NA);
02288
02289 return;
02290 }
02291
02292 if (NA > NB)
02293 {
02294 std::swap(A, B);
02295 std::swap(NA, NB);
02296 }
02297
02298 assert(NB % NA == 0);
02299
02300 if (NA==2 && !A[1])
02301 {
02302 switch (A[0])
02303 {
02304 case 0:
02305 SetWords(R, 0, NB+2);
02306 return;
02307 case 1:
02308 CopyWords(R, B, NB);
02309 R[NB] = R[NB+1] = 0;
02310 return;
02311 default:
02312 R[NB] = LinearMultiply(R, B, A[0], NB);
02313 R[NB+1] = 0;
02314 return;
02315 }
02316 }
02317
02318 size_t i;
02319 if ((NB/NA)%2 == 0)
02320 {
02321 Multiply(R, T, A, B, NA);
02322 CopyWords(T+2*NA, R+NA, NA);
02323
02324 for (i=2*NA; i<NB; i+=2*NA)
02325 Multiply(T+NA+i, T, A, B+i, NA);
02326 for (i=NA; i<NB; i+=2*NA)
02327 Multiply(R+i, T, A, B+i, NA);
02328 }
02329 else
02330 {
02331 for (i=0; i<NB; i+=2*NA)
02332 Multiply(R+i, T, A, B+i, NA);
02333 for (i=NA; i<NB; i+=2*NA)
02334 Multiply(T+NA+i, T, A, B+i, NA);
02335 }
02336
02337 if (Add(R+NA, R+NA, T+2*NA, NB-NA))
02338 Increment(R+NB, NA);
02339 }
02340
02341
02342
02343
02344
02345 void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N)
02346 {
02347 if (N==2)
02348 {
02349 T[0] = AtomicInverseModPower2(A[0]);
02350 T[1] = 0;
02351 s_pBot[0](T+2, T, A);
02352 TwosComplement(T+2, 2);
02353 Increment(T+2, 2, 2);
02354 s_pBot[0](R, T, T+2);
02355 }
02356 else
02357 {
02358 const size_t N2 = N/2;
02359 RecursiveInverseModPower2(R0, T0, A0, N2);
02360 T0[0] = 1;
02361 SetWords(T0+1, 0, N2-1);
02362 MultiplyTop(R1, T1, T0, R0, A0, N2);
02363 MultiplyBottom(T0, T1, R0, A1, N2);
02364 Add(T0, R1, T0, N2);
02365 TwosComplement(T0, N2);
02366 MultiplyBottom(R1, T1, R0, T0, N2);
02367 }
02368 }
02369
02370
02371
02372
02373
02374
02375
02376 void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N)
02377 {
02378 #if 1
02379 MultiplyBottom(R, T, X, U, N);
02380 MultiplyTop(T, T+N, X, R, M, N);
02381 word borrow = Subtract(T, X+N, T, N);
02382
02383 word carry = Add(T+N, T, M, N);
02384 assert(carry | !borrow);
02385 CRYPTOPP_UNUSED(carry), CRYPTOPP_UNUSED(borrow);
02386 CopyWords(R, T + ((0-borrow) & N), N);
02387 #elif 0
02388 const word u = 0-U[0];
02389 Declare2Words(p)
02390 for (size_t i=0; i<N; i++)
02391 {
02392 const word t = u * X[i];
02393 word c = 0;
02394 for (size_t j=0; j<N; j+=2)
02395 {
02396 MultiplyWords(p, t, M[j]);
02397 Acc2WordsBy1(p, X[i+j]);
02398 Acc2WordsBy1(p, c);
02399 X[i+j] = LowWord(p);
02400 c = HighWord(p);
02401 MultiplyWords(p, t, M[j+1]);
02402 Acc2WordsBy1(p, X[i+j+1]);
02403 Acc2WordsBy1(p, c);
02404 X[i+j+1] = LowWord(p);
02405 c = HighWord(p);
02406 }
02407
02408 if (Increment(X+N+i, N-i, c))
02409 while (!Subtract(X+N, X+N, M, N)) {}
02410 }
02411
02412 memcpy(R, X+N, N*WORD_SIZE);
02413 #else
02414 __m64 u = _mm_cvtsi32_si64(0-U[0]), p;
02415 for (size_t i=0; i<N; i++)
02416 {
02417 __m64 t = _mm_cvtsi32_si64(X[i]);
02418 t = _mm_mul_su32(t, u);
02419 __m64 c = _mm_setzero_si64();
02420 for (size_t j=0; j<N; j+=2)
02421 {
02422 p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
02423 p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
02424 c = _mm_add_si64(c, p);
02425 X[i+j] = _mm_cvtsi64_si32(c);
02426 c = _mm_srli_si64(c, 32);
02427 p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
02428 p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
02429 c = _mm_add_si64(c, p);
02430 X[i+j+1] = _mm_cvtsi64_si32(c);
02431 c = _mm_srli_si64(c, 32);
02432 }
02433
02434 if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
02435 while (!Subtract(X+N, X+N, M, N)) {}
02436 }
02437
02438 memcpy(R, X+N, N*WORD_SIZE);
02439 _mm_empty();
02440 #endif
02441 }
02442
02443
02444
02445
02446
02447
02448
02449
02450 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N)
02451 {
02452 assert(N%2==0 && N>=4);
02453
02454 #define M0 M
02455 #define M1 (M+N2)
02456 #define V0 V
02457 #define V1 (V+N2)
02458
02459 #define X0 X
02460 #define X1 (X+N2)
02461 #define X2 (X+N)
02462 #define X3 (X+N+N2)
02463
02464 const size_t N2 = N/2;
02465 Multiply(T0, T2, V0, X3, N2);
02466 int c2 = Add(T0, T0, X0, N);
02467 MultiplyBottom(T3, T2, T0, U, N2);
02468 MultiplyTop(T2, R, T0, T3, M0, N2);
02469 c2 -= Subtract(T2, T1, T2, N2);
02470 Multiply(T0, R, T3, M1, N2);
02471 c2 -= Subtract(T0, T2, T0, N2);
02472 int c3 = -(int)Subtract(T1, X2, T1, N2);
02473 Multiply(R0, T2, V1, X3, N2);
02474 c3 += Add(R, R, T, N);
02475
02476 if (c2>0)
02477 c3 += Increment(R1, N2);
02478 else if (c2<0)
02479 c3 -= Decrement(R1, N2, -c2);
02480
02481 assert(c3>=-1 && c3<=1);
02482 if (c3>0)
02483 Subtract(R, R, M, N);
02484 else if (c3<0)
02485 Add(R, R, M, N);
02486
02487 #undef M0
02488 #undef M1
02489 #undef V0
02490 #undef V1
02491
02492 #undef X0
02493 #undef X1
02494 #undef X2
02495 #undef X3
02496 }
02497
02498 #undef A0
02499 #undef A1
02500 #undef B0
02501 #undef B1
02502
02503 #undef T0
02504 #undef T1
02505 #undef T2
02506 #undef T3
02507
02508 #undef R0
02509 #undef R1
02510 #undef R2
02511 #undef R3
02512
02513
02514
02515
02516
02517
02518
02519
02520
02521
02522
02523
02524
02525
02526
02527
02528
02529
02530
02531
02532
02533
02534
02535
02536
02537
02538
02539
02540
02541
02542
02543
02544
02545
02546
02547
02548
02549
02550
02551
02552
02553
02554
02555
02556
02557
02558
02559
02560
02561
02562
02563
02564
02565
02566
02567
02568
02569
02570
02571
02572
02573
02574
02575
02576
02577 static inline void AtomicDivide(word *Q, const word *A, const word *B)
02578 {
02579 word T[4];
02580 DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
02581 Q[0] = q.GetLowHalf();
02582 Q[1] = q.GetHighHalf();
02583
02584 #ifndef NDEBUG
02585 if (B[0] || B[1])
02586 {
02587
02588 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
02589 word P[4];
02590 s_pMul[0](P, Q, B);
02591 Add(P, P, T, 4);
02592 assert(memcmp(P, A, 4*WORD_SIZE)==0);
02593 }
02594 #endif
02595 }
02596
02597
02598 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N)
02599 {
02600 assert(N && N%2==0);
02601
02602 AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
02603
02604 word borrow = Subtract(R, R, T, N+2);
02605 assert(!borrow && !R[N+1]);
02606 CRYPTOPP_UNUSED(borrow);
02607
02608 while (R[N] || Compare(R, B, N) >= 0)
02609 {
02610 R[N] -= Subtract(R, R, B, N);
02611 Q[1] += (++Q[0]==0);
02612 assert(Q[0] || Q[1]);
02613 }
02614 }
02615
02616
02617
02618
02619
02620
02621
02622 void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB)
02623 {
02624 assert(NA && NB && NA%2==0 && NB%2==0);
02625 assert(B[NB-1] || B[NB-2]);
02626 assert(NB <= NA);
02627
02628
02629 word *const TA=T;
02630 word *const TB=T+NA+2;
02631 word *const TP=T+NA+2+NB;
02632
02633
02634 unsigned shiftWords = (B[NB-1]==0);
02635 TB[0] = TB[NB-1] = 0;
02636 CopyWords(TB+shiftWords, B, NB-shiftWords);
02637 unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
02638 assert(shiftBits < WORD_BITS);
02639 ShiftWordsLeftByBits(TB, NB, shiftBits);
02640
02641
02642 TA[0] = TA[NA] = TA[NA+1] = 0;
02643 CopyWords(TA+shiftWords, A, NA);
02644 ShiftWordsLeftByBits(TA, NA+2, shiftBits);
02645
02646 if (TA[NA+1]==0 && TA[NA] <= 1)
02647 {
02648 Q[NA-NB+1] = Q[NA-NB] = 0;
02649 while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
02650 {
02651 TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
02652 ++Q[NA-NB];
02653 }
02654 }
02655 else
02656 {
02657 NA+=2;
02658 assert(Compare(TA+NA-NB, TB, NB) < 0);
02659 }
02660
02661 word BT[2];
02662 BT[0] = TB[NB-2] + 1;
02663 BT[1] = TB[NB-1] + (BT[0]==0);
02664
02665
02666 for (size_t i=NA-2; i>=NB; i-=2)
02667 {
02668 AtomicDivide(Q+i-NB, TA+i-2, BT);
02669 CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
02670 }
02671
02672
02673 CopyWords(R, TA+shiftWords, NB);
02674 ShiftWordsRightByBits(R, NB, shiftBits);
02675 }
02676
02677 static inline size_t EvenWordCount(const word *X, size_t N)
02678 {
02679 while (N && X[N-2]==0 && X[N-1]==0)
02680 N-=2;
02681 return N;
02682 }
02683
02684
02685
02686
02687
02688
02689
02690 unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N)
02691 {
02692 assert(NA<=N && N && N%2==0);
02693
02694 word *b = T;
02695 word *c = T+N;
02696 word *f = T+2*N;
02697 word *g = T+3*N;
02698 size_t bcLen=2, fgLen=EvenWordCount(M, N);
02699 unsigned int k=0;
02700 bool s=false;
02701
02702 SetWords(T, 0, 3*N);
02703 b[0]=1;
02704 CopyWords(f, A, NA);
02705 CopyWords(g, M, N);
02706
02707 while (1)
02708 {
02709 word t=f[0];
02710 while (!t)
02711 {
02712 if (EvenWordCount(f, fgLen)==0)
02713 {
02714 SetWords(R, 0, N);
02715 return 0;
02716 }
02717
02718 ShiftWordsRightByWords(f, fgLen, 1);
02719 bcLen += 2 * (c[bcLen-1] != 0);
02720 assert(bcLen <= N);
02721 ShiftWordsLeftByWords(c, bcLen, 1);
02722 k+=WORD_BITS;
02723 t=f[0];
02724 }
02725
02726
02727 unsigned int i = TrailingZeros(t);
02728 t >>= i;
02729 k += i;
02730
02731 if (t==1 && f[1]==0 && EvenWordCount(f+2, fgLen-2)==0)
02732 {
02733 if (s)
02734 Subtract(R, M, b, N);
02735 else
02736 CopyWords(R, b, N);
02737 return k;
02738 }
02739
02740 ShiftWordsRightByBits(f, fgLen, i);
02741 t = ShiftWordsLeftByBits(c, bcLen, i);
02742 c[bcLen] += t;
02743 bcLen += 2 * (t!=0);
02744 assert(bcLen <= N);
02745
02746 bool swap = Compare(f, g, fgLen)==-1;
02747 ConditionalSwapPointers(swap, f, g);
02748 ConditionalSwapPointers(swap, b, c);
02749 s ^= swap;
02750
02751 fgLen -= 2 * !(f[fgLen-2] | f[fgLen-1]);
02752
02753 Subtract(f, f, g, fgLen);
02754 t = Add(b, b, c, bcLen);
02755 b[bcLen] += t;
02756 bcLen += 2*t;
02757 assert(bcLen <= N);
02758 }
02759 }
02760
02761
02762
02763
02764
02765 void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
02766 {
02767 CopyWords(R, A, N);
02768
02769 while (k--)
02770 {
02771 if (R[0]%2==0)
02772 ShiftWordsRightByBits(R, N, 1);
02773 else
02774 {
02775 word carry = Add(R, R, M, N);
02776 ShiftWordsRightByBits(R, N, 1);
02777 R[N-1] += carry<<(WORD_BITS-1);
02778 }
02779 }
02780 }
02781
02782
02783
02784
02785
02786 void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
02787 {
02788 CopyWords(R, A, N);
02789
02790 while (k--)
02791 if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
02792 Subtract(R, R, M, N);
02793 }
02794
02795
02796
02797 InitializeInteger::InitializeInteger()
02798 {
02799 if (!g_pAssignIntToInteger)
02800 {
02801 SetFunctionPointers();
02802 g_pAssignIntToInteger = AssignIntToInteger;
02803 }
02804 }
02805
02806 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
02807
02808 static inline size_t RoundupSize(size_t n)
02809 {
02810 if (n<=8)
02811 return RoundupSizeTable[n];
02812 else if (n<=16)
02813 return 16;
02814 else if (n<=32)
02815 return 32;
02816 else if (n<=64)
02817 return 64;
02818 else return size_t(1) << BitPrecision(n-1);
02819 }
02820
02821 Integer::Integer()
02822 : reg(2), sign(POSITIVE)
02823 {
02824 reg[0] = reg[1] = 0;
02825 }
02826
02827 Integer::Integer(const Integer& t)
02828 : reg(RoundupSize(t.WordCount())), sign(t.sign)
02829 {
02830 CopyWords(reg, t.reg, reg.size());
02831 }
02832
02833 Integer::Integer(Sign s, lword value)
02834 : reg(2), sign(s)
02835 {
02836 reg[0] = word(value);
02837 reg[1] = word(SafeRightShift<WORD_BITS>(value));
02838 }
02839
02840 Integer::Integer(signed long value)
02841 : reg(2)
02842 {
02843 if (value >= 0)
02844 sign = POSITIVE;
02845 else
02846 {
02847 sign = NEGATIVE;
02848 value = -value;
02849 }
02850 reg[0] = word(value);
02851 reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
02852 }
02853
02854 Integer::Integer(Sign s, word high, word low)
02855 : reg(2), sign(s)
02856 {
02857 reg[0] = low;
02858 reg[1] = high;
02859 }
02860
02861 bool Integer::IsConvertableToLong() const
02862 {
02863 if (ByteCount() > sizeof(long))
02864 return false;
02865
02866 unsigned long value = (unsigned long)reg[0];
02867 value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
02868
02869 if (sign==POSITIVE)
02870 return (signed long)value >= 0;
02871 else
02872 return -(signed long)value < 0;
02873 }
02874
02875 signed long Integer::ConvertToLong() const
02876 {
02877 assert(IsConvertableToLong());
02878
02879 unsigned long value = (unsigned long)reg[0];
02880 value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
02881 return sign==POSITIVE ? value : -(signed long)value;
02882 }
02883
02884 Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s)
02885 {
02886 Decode(encodedInteger, byteCount, s);
02887 }
02888
02889 Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s)
02890 {
02891 Decode(encodedInteger, byteCount, s);
02892 }
02893
02894 Integer::Integer(BufferedTransformation &bt)
02895 {
02896 BERDecode(bt);
02897 }
02898
02899 Integer::Integer(RandomNumberGenerator &rng, size_t bitcount)
02900 {
02901 Randomize(rng, bitcount);
02902 }
02903
02904 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
02905 {
02906 if (!Randomize(rng, min, max, rnType, equiv, mod))
02907 throw Integer::RandomNumberNotFound();
02908 }
02909
02910 Integer Integer::Power2(size_t e)
02911 {
02912 Integer r((word)0, BitsToWords(e+1));
02913 r.SetBit(e);
02914 return r;
02915 }
02916
02917 template <long i>
02918 struct NewInteger
02919 {
02920 Integer * operator()() const
02921 {
02922 return new Integer(i);
02923 }
02924 };
02925
02926 const Integer &Integer::Zero()
02927 {
02928 return Singleton<Integer>().Ref();
02929 }
02930
02931 const Integer &Integer::One()
02932 {
02933 return Singleton<Integer, NewInteger<1> >().Ref();
02934 }
02935
02936 const Integer &Integer::Two()
02937 {
02938 return Singleton<Integer, NewInteger<2> >().Ref();
02939 }
02940
02941 bool Integer::operator!() const
02942 {
02943 return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
02944 }
02945
02946 Integer& Integer::operator=(const Integer& t)
02947 {
02948 if (this != &t)
02949 {
02950 if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0)
02951 reg.New(RoundupSize(t.WordCount()));
02952 CopyWords(reg, t.reg, reg.size());
02953 sign = t.sign;
02954 }
02955 return *this;
02956 }
02957
02958 bool Integer::GetBit(size_t n) const
02959 {
02960 if (n/WORD_BITS >= reg.size())
02961 return 0;
02962 else
02963 return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
02964 }
02965
02966 void Integer::SetBit(size_t n, bool value)
02967 {
02968 if (value)
02969 {
02970 reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
02971 reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
02972 }
02973 else
02974 {
02975 if (n/WORD_BITS < reg.size())
02976 reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
02977 }
02978 }
02979
02980 byte Integer::GetByte(size_t n) const
02981 {
02982 if (n/WORD_SIZE >= reg.size())
02983 return 0;
02984 else
02985 return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
02986 }
02987
02988 void Integer::SetByte(size_t n, byte value)
02989 {
02990 reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
02991 reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
02992 reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
02993 }
02994
02995 lword Integer::GetBits(size_t i, size_t n) const
02996 {
02997 lword v = 0;
02998 assert(n <= sizeof(v)*8);
02999 for (unsigned int j=0; j<n; j++)
03000 v |= lword(GetBit(i+j)) << j;
03001 return v;
03002 }
03003
03004 Integer Integer::operator-() const
03005 {
03006 Integer result(*this);
03007 result.Negate();
03008 return result;
03009 }
03010
03011 Integer Integer::AbsoluteValue() const
03012 {
03013 Integer result(*this);
03014 result.sign = POSITIVE;
03015 return result;
03016 }
03017
03018 void Integer::swap(Integer &a)
03019 {
03020 reg.swap(a.reg);
03021 std::swap(sign, a.sign);
03022 }
03023
03024 Integer::Integer(word value, size_t length)
03025 : reg(RoundupSize(length)), sign(POSITIVE)
03026 {
03027 reg[0] = value;
03028 SetWords(reg+1, 0, reg.size()-1);
03029 }
03030
03031 template <class T>
03032 static Integer StringToInteger(const T *str)
03033 {
03034 int radix;
03035
03036
03037 unsigned int length;
03038 for (length = 0; str[length] != 0; length++) {}
03039
03040 Integer v;
03041
03042 if (length == 0)
03043 return v;
03044
03045 switch (str[length-1])
03046 {
03047 case 'h':
03048 case 'H':
03049 radix=16;
03050 break;
03051 case 'o':
03052 case 'O':
03053 radix=8;
03054 break;
03055 case 'b':
03056 case 'B':
03057 radix=2;
03058 break;
03059 default:
03060 radix=10;
03061 }
03062
03063 if (length > 2 && str[0] == '0' && str[1] == 'x')
03064 radix = 16;
03065
03066 for (unsigned i=0; i<length; i++)
03067 {
03068 int digit;
03069
03070 if (str[i] >= '0' && str[i] <= '9')
03071 digit = str[i] - '0';
03072 else if (str[i] >= 'A' && str[i] <= 'F')
03073 digit = str[i] - 'A' + 10;
03074 else if (str[i] >= 'a' && str[i] <= 'f')
03075 digit = str[i] - 'a' + 10;
03076 else
03077 digit = radix;
03078
03079 if (digit < radix)
03080 {
03081 v *= radix;
03082 v += digit;
03083 }
03084 }
03085
03086 if (str[0] == '-')
03087 v.Negate();
03088
03089 return v;
03090 }
03091
03092 Integer::Integer(const char *str)
03093 : reg(2), sign(POSITIVE)
03094 {
03095 *this = StringToInteger(str);
03096 }
03097
03098 Integer::Integer(const wchar_t *str)
03099 : reg(2), sign(POSITIVE)
03100 {
03101 *this = StringToInteger(str);
03102 }
03103
03104 unsigned int Integer::WordCount() const
03105 {
03106 return (unsigned int)CountWords(reg, reg.size());
03107 }
03108
03109 unsigned int Integer::ByteCount() const
03110 {
03111 unsigned wordCount = WordCount();
03112 if (wordCount)
03113 return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
03114 else
03115 return 0;
03116 }
03117
03118 unsigned int Integer::BitCount() const
03119 {
03120 unsigned wordCount = WordCount();
03121 if (wordCount)
03122 return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
03123 else
03124 return 0;
03125 }
03126
03127 void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
03128 {
03129 StringStore store(input, inputLen);
03130 Decode(store, inputLen, s);
03131 }
03132
03133 void Integer::Decode(BufferedTransformation &bt, size_t inputLen, Signedness s)
03134 {
03135 assert(bt.MaxRetrievable() >= inputLen);
03136
03137 byte b;
03138 bt.Peek(b);
03139 sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
03140
03141 while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
03142 {
03143 bt.Skip(1);
03144 inputLen--;
03145 bt.Peek(b);
03146 }
03147
03148
03149 const size_t size = RoundupSize(BytesToWords(inputLen));
03150 reg.CleanNew(size);
03151
03152 assert(reg.SizeInBytes() >= inputLen);
03153 for (size_t i=inputLen; i > 0; i--)
03154 {
03155 bt.Get(b);
03156 reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
03157 }
03158
03159 if (sign == NEGATIVE)
03160 {
03161 for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
03162 reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
03163 TwosComplement(reg, reg.size());
03164 }
03165 }
03166
03167 size_t Integer::MinEncodedSize(Signedness signedness) const
03168 {
03169 unsigned int outputLen = STDMAX(1U, ByteCount());
03170 if (signedness == UNSIGNED)
03171 return outputLen;
03172 if (NotNegative() && (GetByte(outputLen-1) & 0x80))
03173 outputLen++;
03174 if (IsNegative() && *this < -Power2(outputLen*8-1))
03175 outputLen++;
03176 return outputLen;
03177 }
03178
03179 void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
03180 {
03181 assert(output && outputLen);
03182 ArraySink sink(output, outputLen);
03183 Encode(sink, outputLen, signedness);
03184 }
03185
03186 void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
03187 {
03188 if (signedness == UNSIGNED || NotNegative())
03189 {
03190 for (size_t i=outputLen; i > 0; i--)
03191 bt.Put(GetByte(i-1));
03192 }
03193 else
03194 {
03195
03196 Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this;
03197 temp.Encode(bt, outputLen, UNSIGNED);
03198 }
03199 }
03200
03201 void Integer::DEREncode(BufferedTransformation &bt) const
03202 {
03203 DERGeneralEncoder enc(bt, INTEGER);
03204 Encode(enc, MinEncodedSize(SIGNED), SIGNED);
03205 enc.MessageEnd();
03206 }
03207
03208 void Integer::BERDecode(const byte *input, size_t len)
03209 {
03210 StringStore store(input, len);
03211 BERDecode(store);
03212 }
03213
03214 void Integer::BERDecode(BufferedTransformation &bt)
03215 {
03216 BERGeneralDecoder dec(bt, INTEGER);
03217 if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
03218 BERDecodeError();
03219 Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
03220 dec.MessageEnd();
03221 }
03222
03223 void Integer::DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
03224 {
03225 DERGeneralEncoder enc(bt, OCTET_STRING);
03226 Encode(enc, length);
03227 enc.MessageEnd();
03228 }
03229
03230 void Integer::BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
03231 {
03232 BERGeneralDecoder dec(bt, OCTET_STRING);
03233 if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
03234 BERDecodeError();
03235 Decode(dec, length);
03236 dec.MessageEnd();
03237 }
03238
03239 size_t Integer::OpenPGPEncode(byte *output, size_t len) const
03240 {
03241 ArraySink sink(output, len);
03242 return OpenPGPEncode(sink);
03243 }
03244
03245 size_t Integer::OpenPGPEncode(BufferedTransformation &bt) const
03246 {
03247 word16 bitCount = word16(BitCount());
03248 bt.PutWord16(bitCount);
03249 size_t byteCount = BitsToBytes(bitCount);
03250 Encode(bt, byteCount);
03251 return 2 + byteCount;
03252 }
03253
03254 void Integer::OpenPGPDecode(const byte *input, size_t len)
03255 {
03256 StringStore store(input, len);
03257 OpenPGPDecode(store);
03258 }
03259
03260 void Integer::OpenPGPDecode(BufferedTransformation &bt)
03261 {
03262 word16 bitCount;
03263 if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
03264 throw OpenPGPDecodeErr();
03265 Decode(bt, BitsToBytes(bitCount));
03266 }
03267
03268 void Integer::Randomize(RandomNumberGenerator &rng, size_t nbits)
03269 {
03270 const size_t nbytes = nbits/8 + 1;
03271 SecByteBlock buf(nbytes);
03272 rng.GenerateBlock(buf, nbytes);
03273 if (nbytes)
03274 buf[0] = (byte)Crop(buf[0], nbits % 8);
03275 Decode(buf, nbytes, UNSIGNED);
03276 }
03277
03278 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
03279 {
03280 if (min > max)
03281 throw InvalidArgument("Integer: Min must be no greater than Max");
03282
03283 Integer range = max - min;
03284 const unsigned int nbits = range.BitCount();
03285
03286 do
03287 {
03288 Randomize(rng, nbits);
03289 }
03290 while (*this > range);
03291
03292 *this += min;
03293 }
03294
03295 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
03296 {
03297 return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
03298 }
03299
03300 class KDF2_RNG : public RandomNumberGenerator
03301 {
03302 public:
03303 KDF2_RNG(const byte *seed, size_t seedSize)
03304 : m_counter(0), m_counterAndSeed(seedSize + 4)
03305 {
03306 memcpy(m_counterAndSeed + 4, seed, seedSize);
03307 }
03308
03309 void GenerateBlock(byte *output, size_t size)
03310 {
03311 PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
03312 ++m_counter;
03313 P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0);
03314 }
03315
03316 private:
03317 word32 m_counter;
03318 SecByteBlock m_counterAndSeed;
03319 };
03320
03321 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs ¶ms)
03322 {
03323 Integer min = params.GetValueWithDefault("Min", Integer::Zero());
03324 Integer max;
03325 if (!params.GetValue("Max", max))
03326 {
03327 int bitLength;
03328 if (params.GetIntValue("BitLength", bitLength))
03329 max = Integer::Power2(bitLength);
03330 else
03331 throw InvalidArgument("Integer: missing Max argument");
03332 }
03333 if (min > max)
03334 throw InvalidArgument("Integer: Min must be no greater than Max");
03335
03336 Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
03337 Integer mod = params.GetValueWithDefault("Mod", Integer::One());
03338
03339 if (equiv.IsNegative() || equiv >= mod)
03340 throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
03341
03342 Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
03343
03344 member_ptr<KDF2_RNG> kdf2Rng;
03345 ConstByteArrayParameter seed;
03346 if (params.GetValue(Name::Seed(), seed))
03347 {
03348 ByteQueue bq;
03349 DERSequenceEncoder seq(bq);
03350 min.DEREncode(seq);
03351 max.DEREncode(seq);
03352 equiv.DEREncode(seq);
03353 mod.DEREncode(seq);
03354 DEREncodeUnsigned(seq, rnType);
03355 DEREncodeOctetString(seq, seed.begin(), seed.size());
03356 seq.MessageEnd();
03357
03358 SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
03359 bq.Get(finalSeed, finalSeed.size());
03360 kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
03361 }
03362 RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
03363
03364 switch (rnType)
03365 {
03366 case ANY:
03367 if (mod == One())
03368 Randomize(rng, min, max);
03369 else
03370 {
03371 Integer min1 = min + (equiv-min)%mod;
03372 if (max < min1)
03373 return false;
03374 Randomize(rng, Zero(), (max - min1) / mod);
03375 *this *= mod;
03376 *this += min1;
03377 }
03378 return true;
03379
03380 case PRIME:
03381 {
03382 const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL);
03383
03384 int i;
03385 i = 0;
03386 while (1)
03387 {
03388 if (++i==16)
03389 {
03390
03391 Integer first = min;
03392 if (FirstPrime(first, max, equiv, mod, pSelector))
03393 {
03394
03395 *this = first;
03396 if (!FirstPrime(first, max, equiv, mod, pSelector))
03397 return true;
03398 }
03399 else
03400 return false;
03401 }
03402
03403 Randomize(rng, min, max);
03404 if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
03405 return true;
03406 }
03407 }
03408
03409 default:
03410 throw InvalidArgument("Integer: invalid RandomNumberType argument");
03411 }
03412 }
03413
03414 std::istream& operator>>(std::istream& in, Integer &a)
03415 {
03416 char c;
03417 unsigned int length = 0;
03418 SecBlock<char> str(length + 16);
03419
03420 std::ws(in);
03421
03422 do
03423 {
03424 in.read(&c, 1);
03425 str[length++] = c;
03426 if (length >= str.size())
03427 str.Grow(length + 16);
03428 }
03429 while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
03430
03431 if (in.gcount())
03432 in.putback(c);
03433 str[length-1] = '\0';
03434 a = Integer(str);
03435
03436 return in;
03437 }
03438
03439 std::ostream& operator<<(std::ostream& out, const Integer &a)
03440 {
03441
03442 const long f = out.flags() & std::ios::basefield;
03443 int base, block;
03444 char suffix;
03445 switch(f)
03446 {
03447 case std::ios::oct :
03448 base = 8;
03449 block = 8;
03450 suffix = 'o';
03451 break;
03452 case std::ios::hex :
03453 base = 16;
03454 block = 4;
03455 suffix = 'h';
03456 break;
03457 default :
03458 base = 10;
03459 block = 3;
03460 suffix = '.';
03461 }
03462
03463 Integer temp1=a, temp2;
03464
03465 if (a.IsNegative())
03466 {
03467 out << '-';
03468 temp1.Negate();
03469 }
03470
03471 if (!a)
03472 out << '0';
03473
03474 static const char upper[]="0123456789ABCDEF";
03475 static const char lower[]="0123456789abcdef";
03476
03477 const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower;
03478 unsigned int i=0;
03479 SecBlock<char> s(a.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
03480
03481 while (!!temp1)
03482 {
03483 word digit;
03484 Integer::Divide(digit, temp2, temp1, base);
03485 s[i++]=vec[digit];
03486 temp1.swap(temp2);
03487 }
03488
03489 while (i--)
03490 {
03491 out << s[i];
03492
03493
03494 }
03495
03496 return out << suffix;
03497 }
03498
03499 Integer& Integer::operator++()
03500 {
03501 if (NotNegative())
03502 {
03503 if (Increment(reg, reg.size()))
03504 {
03505 reg.CleanGrow(2*reg.size());
03506 reg[reg.size()/2]=1;
03507 }
03508 }
03509 else
03510 {
03511 word borrow = Decrement(reg, reg.size());
03512 assert(!borrow);
03513 CRYPTOPP_UNUSED(borrow);
03514
03515 if (WordCount()==0)
03516 *this = Zero();
03517 }
03518 return *this;
03519 }
03520
03521 Integer& Integer::operator--()
03522 {
03523 if (IsNegative())
03524 {
03525 if (Increment(reg, reg.size()))
03526 {
03527 reg.CleanGrow(2*reg.size());
03528 reg[reg.size()/2]=1;
03529 }
03530 }
03531 else
03532 {
03533 if (Decrement(reg, reg.size()))
03534 *this = -One();
03535 }
03536 return *this;
03537 }
03538
03539 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
03540 {
03541 int carry;
03542 if (a.reg.size() == b.reg.size())
03543 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03544 else if (a.reg.size() > b.reg.size())
03545 {
03546 carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
03547 CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
03548 carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
03549 }
03550 else
03551 {
03552 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03553 CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
03554 carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
03555 }
03556
03557 if (carry)
03558 {
03559 sum.reg.CleanGrow(2*sum.reg.size());
03560 sum.reg[sum.reg.size()/2] = 1;
03561 }
03562 sum.sign = Integer::POSITIVE;
03563 }
03564
03565 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
03566 {
03567 unsigned aSize = a.WordCount();
03568 aSize += aSize%2;
03569 unsigned bSize = b.WordCount();
03570 bSize += bSize%2;
03571
03572 if (aSize == bSize)
03573 {
03574 if (Compare(a.reg, b.reg, aSize) >= 0)
03575 {
03576 Subtract(diff.reg, a.reg, b.reg, aSize);
03577 diff.sign = Integer::POSITIVE;
03578 }
03579 else
03580 {
03581 Subtract(diff.reg, b.reg, a.reg, aSize);
03582 diff.sign = Integer::NEGATIVE;
03583 }
03584 }
03585 else if (aSize > bSize)
03586 {
03587 word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
03588 CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
03589 borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
03590 assert(!borrow);
03591 diff.sign = Integer::POSITIVE;
03592 }
03593 else
03594 {
03595 word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
03596 CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
03597 borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
03598 assert(!borrow);
03599 diff.sign = Integer::NEGATIVE;
03600 }
03601 }
03602
03603
03604 template <class T> inline const T& STDMAX2(const T& a, const T& b)
03605 {
03606 return a < b ? b : a;
03607 }
03608
03609 Integer Integer::Plus(const Integer& b) const
03610 {
03611 Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
03612 if (NotNegative())
03613 {
03614 if (b.NotNegative())
03615 PositiveAdd(sum, *this, b);
03616 else
03617 PositiveSubtract(sum, *this, b);
03618 }
03619 else
03620 {
03621 if (b.NotNegative())
03622 PositiveSubtract(sum, b, *this);
03623 else
03624 {
03625 PositiveAdd(sum, *this, b);
03626 sum.sign = Integer::NEGATIVE;
03627 }
03628 }
03629 return sum;
03630 }
03631
03632 Integer& Integer::operator+=(const Integer& t)
03633 {
03634 reg.CleanGrow(t.reg.size());
03635 if (NotNegative())
03636 {
03637 if (t.NotNegative())
03638 PositiveAdd(*this, *this, t);
03639 else
03640 PositiveSubtract(*this, *this, t);
03641 }
03642 else
03643 {
03644 if (t.NotNegative())
03645 PositiveSubtract(*this, t, *this);
03646 else
03647 {
03648 PositiveAdd(*this, *this, t);
03649 sign = Integer::NEGATIVE;
03650 }
03651 }
03652 return *this;
03653 }
03654
03655 Integer Integer::Minus(const Integer& b) const
03656 {
03657 Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
03658 if (NotNegative())
03659 {
03660 if (b.NotNegative())
03661 PositiveSubtract(diff, *this, b);
03662 else
03663 PositiveAdd(diff, *this, b);
03664 }
03665 else
03666 {
03667 if (b.NotNegative())
03668 {
03669 PositiveAdd(diff, *this, b);
03670 diff.sign = Integer::NEGATIVE;
03671 }
03672 else
03673 PositiveSubtract(diff, b, *this);
03674 }
03675 return diff;
03676 }
03677
03678 Integer& Integer::operator-=(const Integer& t)
03679 {
03680 reg.CleanGrow(t.reg.size());
03681 if (NotNegative())
03682 {
03683 if (t.NotNegative())
03684 PositiveSubtract(*this, *this, t);
03685 else
03686 PositiveAdd(*this, *this, t);
03687 }
03688 else
03689 {
03690 if (t.NotNegative())
03691 {
03692 PositiveAdd(*this, *this, t);
03693 sign = Integer::NEGATIVE;
03694 }
03695 else
03696 PositiveSubtract(*this, t, *this);
03697 }
03698 return *this;
03699 }
03700
03701 Integer& Integer::operator<<=(size_t n)
03702 {
03703 const size_t wordCount = WordCount();
03704 const size_t shiftWords = n / WORD_BITS;
03705 const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
03706
03707 reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
03708 ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
03709 ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
03710 return *this;
03711 }
03712
03713 Integer& Integer::operator>>=(size_t n)
03714 {
03715 const size_t wordCount = WordCount();
03716 const size_t shiftWords = n / WORD_BITS;
03717 const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
03718
03719 ShiftWordsRightByWords(reg, wordCount, shiftWords);
03720 if (wordCount > shiftWords)
03721 ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
03722 if (IsNegative() && WordCount()==0)
03723 *this = Zero();
03724 return *this;
03725 }
03726
03727 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
03728 {
03729 size_t aSize = RoundupSize(a.WordCount());
03730 size_t bSize = RoundupSize(b.WordCount());
03731
03732 product.reg.CleanNew(RoundupSize(aSize+bSize));
03733 product.sign = Integer::POSITIVE;
03734
03735 IntegerSecBlock workspace(aSize + bSize);
03736 AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
03737 }
03738
03739 void Multiply(Integer &product, const Integer &a, const Integer &b)
03740 {
03741 PositiveMultiply(product, a, b);
03742
03743 if (a.NotNegative() != b.NotNegative())
03744 product.Negate();
03745 }
03746
03747 Integer Integer::Times(const Integer &b) const
03748 {
03749 Integer product;
03750 Multiply(product, *this, b);
03751 return product;
03752 }
03753
03754
03755
03756
03757
03758
03759
03760
03761
03762
03763
03764
03765
03766
03767
03768
03769
03770
03771
03772
03773
03774
03775
03776 void PositiveDivide(Integer &remainder, Integer "ient,
03777 const Integer &a, const Integer &b)
03778 {
03779 unsigned aSize = a.WordCount();
03780 unsigned bSize = b.WordCount();
03781
03782 if (!bSize)
03783 throw Integer::DivideByZero();
03784
03785 if (aSize < bSize)
03786 {
03787 remainder = a;
03788 remainder.sign = Integer::POSITIVE;
03789 quotient = Integer::Zero();
03790 return;
03791 }
03792
03793 aSize += aSize%2;
03794 bSize += bSize%2;
03795
03796 remainder.reg.CleanNew(RoundupSize(bSize));
03797 remainder.sign = Integer::POSITIVE;
03798 quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
03799 quotient.sign = Integer::POSITIVE;
03800
03801 IntegerSecBlock T(aSize+3*(bSize+2));
03802 Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
03803 }
03804
03805 void Integer::Divide(Integer &remainder, Integer "ient, const Integer ÷nd, const Integer &divisor)
03806 {
03807 PositiveDivide(remainder, quotient, dividend, divisor);
03808
03809 if (dividend.IsNegative())
03810 {
03811 quotient.Negate();
03812 if (remainder.NotZero())
03813 {
03814 --quotient;
03815 remainder = divisor.AbsoluteValue() - remainder;
03816 }
03817 }
03818
03819 if (divisor.IsNegative())
03820 quotient.Negate();
03821 }
03822
03823 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
03824 {
03825 q = a;
03826 q >>= n;
03827
03828 const size_t wordCount = BitsToWords(n);
03829 if (wordCount <= a.WordCount())
03830 {
03831 r.reg.resize(RoundupSize(wordCount));
03832 CopyWords(r.reg, a.reg, wordCount);
03833 SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
03834 if (n % WORD_BITS != 0)
03835 r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
03836 }
03837 else
03838 {
03839 r.reg.resize(RoundupSize(a.WordCount()));
03840 CopyWords(r.reg, a.reg, r.reg.size());
03841 }
03842 r.sign = POSITIVE;
03843
03844 if (a.IsNegative() && r.NotZero())
03845 {
03846 --q;
03847 r = Power2(n) - r;
03848 }
03849 }
03850
03851 Integer Integer::DividedBy(const Integer &b) const
03852 {
03853 Integer remainder, quotient;
03854 Integer::Divide(remainder, quotient, *this, b);
03855 return quotient;
03856 }
03857
03858 Integer Integer::Modulo(const Integer &b) const
03859 {
03860 Integer remainder, quotient;
03861 Integer::Divide(remainder, quotient, *this, b);
03862 return remainder;
03863 }
03864
03865 void Integer::Divide(word &remainder, Integer "ient, const Integer ÷nd, word divisor)
03866 {
03867 if (!divisor)
03868 throw Integer::DivideByZero();
03869
03870 assert(divisor);
03871
03872 if ((divisor & (divisor-1)) == 0)
03873 {
03874 quotient = dividend >> (BitPrecision(divisor)-1);
03875 remainder = dividend.reg[0] & (divisor-1);
03876 return;
03877 }
03878
03879 unsigned int i = dividend.WordCount();
03880 quotient.reg.CleanNew(RoundupSize(i));
03881 remainder = 0;
03882 while (i--)
03883 {
03884 quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
03885 remainder = DWord(dividend.reg[i], remainder) % divisor;
03886 }
03887
03888 if (dividend.NotNegative())
03889 quotient.sign = POSITIVE;
03890 else
03891 {
03892 quotient.sign = NEGATIVE;
03893 if (remainder)
03894 {
03895 --quotient;
03896 remainder = divisor - remainder;
03897 }
03898 }
03899 }
03900
03901 Integer Integer::DividedBy(word b) const
03902 {
03903 word remainder;
03904 Integer quotient;
03905 Integer::Divide(remainder, quotient, *this, b);
03906 return quotient;
03907 }
03908
03909 word Integer::Modulo(word divisor) const
03910 {
03911 if (!divisor)
03912 throw Integer::DivideByZero();
03913
03914 assert(divisor);
03915
03916 word remainder;
03917
03918 if ((divisor & (divisor-1)) == 0)
03919 remainder = reg[0] & (divisor-1);
03920 else
03921 {
03922 unsigned int i = WordCount();
03923
03924 if (divisor <= 5)
03925 {
03926 DWord sum(0, 0);
03927 while (i--)
03928 sum += reg[i];
03929 remainder = sum % divisor;
03930 }
03931 else
03932 {
03933 remainder = 0;
03934 while (i--)
03935 remainder = DWord(reg[i], remainder) % divisor;
03936 }
03937 }
03938
03939 if (IsNegative() && remainder)
03940 remainder = divisor - remainder;
03941
03942 return remainder;
03943 }
03944
03945 void Integer::Negate()
03946 {
03947 if (!!(*this))
03948 sign = Sign(1-sign);
03949 }
03950
03951 int Integer::PositiveCompare(const Integer& t) const
03952 {
03953 unsigned size = WordCount(), tSize = t.WordCount();
03954
03955 if (size == tSize)
03956 return CryptoPP::Compare(reg, t.reg, size);
03957 else
03958 return size > tSize ? 1 : -1;
03959 }
03960
03961 int Integer::Compare(const Integer& t) const
03962 {
03963 if (NotNegative())
03964 {
03965 if (t.NotNegative())
03966 return PositiveCompare(t);
03967 else
03968 return 1;
03969 }
03970 else
03971 {
03972 if (t.NotNegative())
03973 return -1;
03974 else
03975 return -PositiveCompare(t);
03976 }
03977 }
03978
03979 Integer Integer::SquareRoot() const
03980 {
03981 if (!IsPositive())
03982 return Zero();
03983
03984
03985 Integer x, y = Power2((BitCount()+1)/2);
03986 assert(y*y >= *this);
03987
03988 do
03989 {
03990 x = y;
03991 y = (x + *this/x) >> 1;
03992 } while (y<x);
03993
03994 return x;
03995 }
03996
03997 bool Integer::IsSquare() const
03998 {
03999 Integer r = SquareRoot();
04000 return *this == r.Squared();
04001 }
04002
04003 bool Integer::IsUnit() const
04004 {
04005 return (WordCount() == 1) && (reg[0] == 1);
04006 }
04007
04008 Integer Integer::MultiplicativeInverse() const
04009 {
04010 return IsUnit() ? *this : Zero();
04011 }
04012
04013 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
04014 {
04015 return x*y%m;
04016 }
04017
04018 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
04019 {
04020 ModularArithmetic mr(m);
04021 return mr.Exponentiate(x, e);
04022 }
04023
04024 Integer Integer::Gcd(const Integer &a, const Integer &b)
04025 {
04026 return EuclideanDomainOf<Integer>().Gcd(a, b);
04027 }
04028
04029 Integer Integer::InverseMod(const Integer &m) const
04030 {
04031 assert(m.NotNegative());
04032
04033 if (IsNegative())
04034 return Modulo(m).InverseMod(m);
04035
04036 if (m.IsEven())
04037 {
04038 if (!m || IsEven())
04039 return Zero();
04040 if (*this == One())
04041 return One();
04042
04043 Integer u = m.Modulo(*this).InverseMod(*this);
04044 return !u ? Zero() : (m*(*this-u)+1)/(*this);
04045 }
04046
04047 SecBlock<word> T(m.reg.size() * 4);
04048 Integer r((word)0, m.reg.size());
04049 unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
04050 DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
04051 return r;
04052 }
04053
04054 word Integer::InverseMod(word mod) const
04055 {
04056 word g0 = mod, g1 = *this % mod;
04057 word v0 = 0, v1 = 1;
04058 word y;
04059
04060 while (g1)
04061 {
04062 if (g1 == 1)
04063 return v1;
04064 y = g0 / g1;
04065 g0 = g0 % g1;
04066 v0 += y * v1;
04067
04068 if (!g0)
04069 break;
04070 if (g0 == 1)
04071 return mod-v0;
04072 y = g1 / g0;
04073 g1 = g1 % g0;
04074 v1 += y * v0;
04075 }
04076 return 0;
04077 }
04078
04079
04080
04081 ModularArithmetic::ModularArithmetic(BufferedTransformation &bt)
04082 {
04083 BERSequenceDecoder seq(bt);
04084 OID oid(seq);
04085 if (oid != ASN1::prime_field())
04086 BERDecodeError();
04087 m_modulus.BERDecode(seq);
04088 seq.MessageEnd();
04089 m_result.reg.resize(m_modulus.reg.size());
04090 }
04091
04092 void ModularArithmetic::DEREncode(BufferedTransformation &bt) const
04093 {
04094 DERSequenceEncoder seq(bt);
04095 ASN1::prime_field().DEREncode(seq);
04096 m_modulus.DEREncode(seq);
04097 seq.MessageEnd();
04098 }
04099
04100 void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const
04101 {
04102 a.DEREncodeAsOctetString(out, MaxElementByteLength());
04103 }
04104
04105 void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const
04106 {
04107 a.BERDecodeAsOctetString(in, MaxElementByteLength());
04108 }
04109
04110 const Integer& ModularArithmetic::Half(const Integer &a) const
04111 {
04112 if (a.reg.size()==m_modulus.reg.size())
04113 {
04114 CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
04115 return m_result;
04116 }
04117 else
04118 return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
04119 }
04120
04121 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
04122 {
04123 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04124 {
04125 if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
04126 || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
04127 {
04128 CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04129 }
04130 return m_result;
04131 }
04132 else
04133 {
04134 m_result1 = a+b;
04135 if (m_result1 >= m_modulus)
04136 m_result1 -= m_modulus;
04137 return m_result1;
04138 }
04139 }
04140
04141 Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const
04142 {
04143 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04144 {
04145 if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
04146 || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
04147 {
04148 CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
04149 }
04150 }
04151 else
04152 {
04153 a+=b;
04154 if (a>=m_modulus)
04155 a-=m_modulus;
04156 }
04157
04158 return a;
04159 }
04160
04161 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
04162 {
04163 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04164 {
04165 if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
04166 CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04167 return m_result;
04168 }
04169 else
04170 {
04171 m_result1 = a-b;
04172 if (m_result1.IsNegative())
04173 m_result1 += m_modulus;
04174 return m_result1;
04175 }
04176 }
04177
04178 Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const
04179 {
04180 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04181 {
04182 if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
04183 CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
04184 }
04185 else
04186 {
04187 a-=b;
04188 if (a.IsNegative())
04189 a+=m_modulus;
04190 }
04191
04192 return a;
04193 }
04194
04195 const Integer& ModularArithmetic::Inverse(const Integer &a) const
04196 {
04197 if (!a)
04198 return a;
04199
04200 CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
04201 if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
04202 Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
04203
04204 return m_result;
04205 }
04206
04207 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
04208 {
04209 if (m_modulus.IsOdd())
04210 {
04211 MontgomeryRepresentation dr(m_modulus);
04212 return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
04213 }
04214 else
04215 return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
04216 }
04217
04218 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
04219 {
04220 if (m_modulus.IsOdd())
04221 {
04222 MontgomeryRepresentation dr(m_modulus);
04223 dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
04224 for (unsigned int i=0; i<exponentsCount; i++)
04225 results[i] = dr.ConvertOut(results[i]);
04226 }
04227 else
04228 AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
04229 }
04230
04231 MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m)
04232 : ModularArithmetic(m),
04233 m_u((word)0, m_modulus.reg.size()),
04234 m_workspace(5*m_modulus.reg.size())
04235 {
04236 if (!m_modulus.IsOdd())
04237 throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
04238
04239 RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
04240 }
04241
04242 const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const
04243 {
04244 word *const T = m_workspace.begin();
04245 word *const R = m_result.reg.begin();
04246 const size_t N = m_modulus.reg.size();
04247 assert(a.reg.size()<=N && b.reg.size()<=N);
04248
04249 AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
04250 SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
04251 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04252 return m_result;
04253 }
04254
04255 const Integer& MontgomeryRepresentation::Square(const Integer &a) const
04256 {
04257 word *const T = m_workspace.begin();
04258 word *const R = m_result.reg.begin();
04259 const size_t N = m_modulus.reg.size();
04260 assert(a.reg.size()<=N);
04261
04262 CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
04263 SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
04264 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04265 return m_result;
04266 }
04267
04268 Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const
04269 {
04270 word *const T = m_workspace.begin();
04271 word *const R = m_result.reg.begin();
04272 const size_t N = m_modulus.reg.size();
04273 assert(a.reg.size()<=N);
04274
04275 CopyWords(T, a.reg, a.reg.size());
04276 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04277 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04278 return m_result;
04279 }
04280
04281 const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const
04282 {
04283
04284 word *const T = m_workspace.begin();
04285 word *const R = m_result.reg.begin();
04286 const size_t N = m_modulus.reg.size();
04287 assert(a.reg.size()<=N);
04288
04289 CopyWords(T, a.reg, a.reg.size());
04290 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04291 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04292 unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
04293
04294
04295
04296 if (k>N*WORD_BITS)
04297 DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
04298 else
04299 MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
04300
04301 return m_result;
04302 }
04303
04304
04305
04306 template <> CRYPTOPP_DLL
04307 std::string IntToString<Integer>(Integer value, unsigned int base)
04308 {
04309
04310 static const unsigned int BIT_32 = (1U << 31);
04311 const bool UPPER = !!(base & BIT_32);
04312 static const unsigned int BIT_31 = (1U << 30);
04313 const bool BASE = !!(base & BIT_31);
04314
04315 const char CH = UPPER ? 'A' : 'a';
04316 base &= ~(BIT_32|BIT_31);
04317 assert(base >= 2 && base <= 32);
04318
04319 if (value == 0)
04320 return "0";
04321
04322 bool negative = false, zero = false;
04323 if (value.IsNegative())
04324 {
04325 negative = true;
04326 value.Negate();
04327 }
04328
04329 if (!value)
04330 zero = true;
04331
04332 SecBlock<char> s(value.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
04333 Integer temp;
04334
04335 unsigned int i=0;
04336 while (!!value)
04337 {
04338 word digit;
04339 Integer::Divide(digit, temp, value, word(base));
04340 s[i++]=char((digit < 10 ? '0' : (CH - 10)) + digit);
04341 value.swap(temp);
04342 }
04343
04344 std::string result;
04345 result.reserve(i+2);
04346
04347 if (negative)
04348 result += '-';
04349
04350 if (zero)
04351 result += '0';
04352
04353 while (i--)
04354 result += s[i];
04355
04356 if (BASE)
04357 {
04358 if (base == 10)
04359 result += '.';
04360 else if (base == 16)
04361 result += 'h';
04362 else if (base == 8)
04363 result += 'o';
04364 else if (base == 2)
04365 result += 'b';
04366 }
04367
04368 return result;
04369 }
04370
04371
04372 template <> CRYPTOPP_DLL
04373 std::string IntToString<unsigned long long>(unsigned long long value, unsigned int base)
04374 {
04375
04376 static const unsigned int HIGH_BIT = (1U << 31);
04377 const char CH = !!(base & HIGH_BIT) ? 'A' : 'a';
04378 base &= ~HIGH_BIT;
04379
04380 assert(base >= 2);
04381 if (value == 0)
04382 return "0";
04383
04384 std::string result;
04385 while (value > 0)
04386 {
04387 unsigned long long digit = value % base;
04388 result = char((digit < 10 ? '0' : (CH - 10)) + digit) + result;
04389 value /= base;
04390 }
04391 return result;
04392 }
04393
04394 NAMESPACE_END
04395
04396 #if WORKAROUND_ARMEL_BUG
04397 # pragma GCC pop_options
04398 #endif
04399
04400 #if WORKAROUND_ARM64_BUG
04401 # pragma GCC pop_options
04402 #endif
04403
04404 #endif